diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f9d16be1558495fb95e3f5c4b785eefd3b3aa854 --- /dev/null +++ b/lib/kokkos/.gitignore @@ -0,0 +1,8 @@ +# Standard ignores +*~ +*.pyc +\#*# +.#* +.*.swp +.cproject +.project diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1219352f73dc47360555639b1f4c3ddde410e9a5 --- /dev/null +++ b/lib/kokkos/CMakeLists.txt @@ -0,0 +1,184 @@ + +IF(COMMAND TRIBITS_PACKAGE_DECL) + SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "") +ELSE() + SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "") +ENDIF() + +IF(NOT KOKKOS_HAS_TRILINOS) + CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR) + INCLUDE(cmake/tribits.cmake) +ENDIF() + +# +# A) Forward delcare the package so that certain options are also defined for +# subpackages +# + +TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) + +#------------------------------------------------------------------------------ +# +# B) Define the common options for Kokkos first so they can be used by +# subpackages as well. +# + + + +# mfh 01 Aug 2016: See Issue #61: +# +# https://github.com/kokkos/kokkos/issues/61 +# +# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines +# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead, +# for compatibility with Kokkos' Makefile build system. + +TRIBITS_ADD_OPTION_AND_DEFINE( + ${PACKAGE_NAME}_ENABLE_DEBUG + ${PACKAGE_NAME_UC}_HAVE_DEBUG + "Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build." + ${${PROJECT_NAME}_ENABLE_DEBUG} +) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_SIERRA_BUILD + KOKKOS_FOR_SIERRA + "Configure Kokkos for building within the Sierra build system." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda + KOKKOS_HAVE_CUDA + "Enable CUDA support in Kokkos." + "${TPL_ENABLE_CUDA}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda_UVM + KOKKOS_USE_CUDA_UVM + "Enable CUDA Unified Virtual Memory support in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Pthread + KOKKOS_HAVE_PTHREAD + "Enable Pthread support in Kokkos." + OFF + ) + +ASSERT_DEFINED(TPL_ENABLE_Pthread) +IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) + MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") +ENDIF () + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_OpenMP + KOKKOS_HAVE_OPENMP + "Enable OpenMP support in Kokkos." + "${${PROJECT_NAME}_ENABLE_OpenMP}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_QTHREAD + KOKKOS_HAVE_QTHREAD + "Enable QTHREAD support in Kokkos." + "${TPL_ENABLE_QTHREAD}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_CXX11 + KOKKOS_HAVE_CXX11 + "Enable C++11 support in Kokkos." + "${${PROJECT_NAME}_ENABLE_CXX11}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_HWLOC + KOKKOS_HAVE_HWLOC + "Enable HWLOC support in Kokkos." + "${TPL_ENABLE_HWLOC}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_MPI + KOKKOS_HAVE_MPI + "Enable MPI support in Kokkos." + "${TPL_ENABLE_MPI}" + ) + +# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option +# +# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check +# option (defined below) is annoyingly not all caps, but we need to +# keep it that way for backwards compatibility. If users forget and +# try using an all-caps variable, then make it count by using the +# all-caps version as the default value of the original, not-all-caps +# option. Otherwise, the default value of this option comes from +# Kokkos_ENABLE_DEBUG (see Issue #367). + +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG) +IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON) + ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") + ENDIF() +ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") +ENDIF() +ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Debug_Bounds_Check + KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + "Enable Kokkos::View run-time bounds checking." + "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Profiling + KOKKOS_ENABLE_PROFILING_INTERNAL + "Enable KokkosP profiling support for kernel data collections." + "${TPL_ENABLE_DLlib}" + ) + +# placeholder for future device... +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Winthread + KOKKOS_HAVE_WINTHREAD + "Enable Winthread support in Kokkos." + "${TPL_ENABLE_Winthread}" + ) + +# use new/old View +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_USING_DEPRECATED_VIEW + KOKKOS_USING_DEPRECATED_VIEW + "Choose whether to use the old, deprecated Kokkos::View" + OFF + ) + +#------------------------------------------------------------------------------ +# +# C) Process the subpackages for Kokkos +# + +TRIBITS_PROCESS_SUBPACKAGES() + +# +# D) If Kokkos itself is enabled, process the Kokkos package +# + +TRIBITS_PACKAGE_DEF() + +TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + +TRIBITS_EXCLUDE_FILES( + classic/doc + classic/LinAlg/doc/CrsRefactorNotesMay2012 + ) + +TRIBITS_PACKAGE_POSTPROCESS() + diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt new file mode 100644 index 0000000000000000000000000000000000000000..05980758fa8fe6317bb08fcc6eb70668b5fd1580 --- /dev/null +++ b/lib/kokkos/Copyright.txt @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/HOW_TO_SNAPSHOT b/lib/kokkos/HOW_TO_SNAPSHOT new file mode 100644 index 0000000000000000000000000000000000000000..46bfb4167f9023a8273ebc872ac450c626603bf0 --- /dev/null +++ b/lib/kokkos/HOW_TO_SNAPSHOT @@ -0,0 +1,73 @@ + +Developers of Kokkos (those who commit modifications to Kokkos) +must maintain the snapshot of Kokkos in the Trilinos repository. + +This file contains instructions for how to +snapshot Kokkos from github.com/kokkos to Trilinos. + +------------------------------------------------------------------------ +*** EVERYTHING GOES RIGHT WORKFLOW *** + +1) Given a 'git clone' of Kokkos and of Trilinos repositories. +1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone. + This path *must* terminate with the directory name 'kokkos'; + e.g., ${HOME}/kokkos . +1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory. + +2) Given that the Kokkos build & test is clean and + changes are committed to the Kokkos clone. + +3) Snapshot the current commit in the Kokkos clone into the Trilinos clone. + This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}: + ${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages + +4) Verify the snapshot commit happened as expected + cd ${TRILINOS}/packages/kokkos + git log -1 --name-only + +5) Modify, build, and test Trilinos with the Kokkos snapshot. + +6) Given that that the Trilinos build & test is clean and + changes are committed to the Trilinos clone. + +7) Attempt push to the Kokkos repository. + If push fails then you must 'remove the Kokkos snapshot' + from your Trilinos clone. + See below. + +8) Attempt to push to the Trilinos repository. + If updating for a failed push requires you to change Kokkos you must + 'remove the Kokkos snapshot' from your Trilinos clone. + See below. + +------------------------------------------------------------------------ +*** WHEN SOMETHING GOES WRONG AND YOU MUST *** +*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE *** + +1) Query the Trilinos clone commit log. + git log --oneline + +2) Note the <SHA1> of the commit to the Trillinos clone + immediately BEFORE the Kokkos snapshot commit. + Copy this <SHA1> for use in the next command. + +3) IF more than one outstanding commit then you can remove just the + Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file. + Remove or comment out the Kokkos snapshot commit entry. + git rebase -i <SHA1> + +4) IF the Kokkos snapshot commit is the one and only + outstanding commit then remove just than commit. + git reset --hard HEAD~1 + +------------------------------------------------------------------------ +*** REGARDING 'snapshot.py' TOOL *** + +The 'snapshot.py' tool is developed and maintained by the +Center for Computing Research (CCR) +Software Engineering, Maintenance, and Support (SEMS) team. + +Contact Brent Perschbacher <bmpersc@sandia.gov> for questions> + +------------------------------------------------------------------------ + diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..05980758fa8fe6317bb08fcc6eb70668b5fd1580 --- /dev/null +++ b/lib/kokkos/LICENSE @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos new file mode 100644 index 0000000000000000000000000000000000000000..c9b6cc464ddd7df1d942a9e38275fa82e96742dc --- /dev/null +++ b/lib/kokkos/Makefile.kokkos @@ -0,0 +1,480 @@ +# Default settings common options + +#LAMMPS specific settings: +KOKKOS_PATH=../../lib/kokkos +CXXFLAGS=$(CCFLAGS) + +#Options: OpenMP,Serial,Pthreads,Cuda +KOKKOS_DEVICES ?= "OpenMP" +#KOKKOS_DEVICES ?= "Pthreads" +#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW +KOKKOS_ARCH ?= "" +#Options: yes,no +KOKKOS_DEBUG ?= "no" +#Options: hwloc,librt,experimental_memkind +KOKKOS_USE_TPLS ?= "" +#Options: c++11 +KOKKOS_CXX_STANDARD ?= "c++11" +#Options: aggressive_vectorization,disable_profiling +KOKKOS_OPTIONS ?= "" + +#Default settings specific options +#Options: force_uvm,use_ldg,rdc,enable_lambda +KOKKOS_CUDA_OPTIONS ?= "" + +# Check for general settings + +KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) +KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) + +# Check for external libraries +KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) +KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) +KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) + +# Check for advanced settings +KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) +KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) + +# Check for Kokkos Host Execution Spaces one of which must be on + +KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) +KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) +KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) +KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l)) + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 +endif +endif + +KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) +KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) +KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) +KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) +KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l) + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -mp +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_CXX11_FLAG := --c++11 +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 + else + KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 + endif + endif +endif + +# Check for other Execution Spaces +KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) + +# Check for Kokkos Architecture settings + +#Intel based +KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) + +#NVIDIA based +NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) +endif + +#ARM based +KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l)) + +#IBM based +KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc)) + +#AMD based +KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) + +#Any AVX? +KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) + +# Decide what ISA level we are able to support +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc )) + +#Incompatible flags? +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) + $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) + $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif + +#Generating the list of Flags + +KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src + +# No warnings: +KOKKOS_CXXFLAGS = +# INTEL and CLANG warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized +# GCC warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered + +KOKKOS_LIBS = -lkokkos -ldl +KOKKOS_LDFLAGS = -L$(shell pwd) +KOKKOS_SRC = +KOKKOS_HEADERS = + +#Generating the KokkosCore_config.h file + +tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) +tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) +tmp := $(shell date >> KokkosCore_config.tmp) +tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) + + +tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib + tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -G +endif + KOKKOS_CXXFLAGS += -g + KOKKOS_LDFLAGS += -g -ldl + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) + tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) + KOKKOS_LIBS += -lrt +endif + +ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind + tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) + tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda +endif + +#Add Architecture flags + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xMIC-AVX512 + KOKKOS_LDFLAGS += -xMIC-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Asssume that this is really a GNU compiler + KOKKOS_CXXFLAGS += -march=knl + KOKKOS_LDFLAGS += -march=knl + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mmic + KOKKOS_LDFLAGS += -mmic +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_30 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_32 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_35 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_37 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_50 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_52 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_53 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_61 +endif +endif + +KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) +ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) +KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) +else +KOKKOS_INTERNAL_NEW_CONFIG := 1 +endif + +ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) + tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) +endif + +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) + +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBS += -lcudart -lcuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_LIBS += -lpthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_LIBS += -lqthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + endif + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) +endif + +#With Cygwin functions such as fdopen and fileno are not defined +#when strict ansi is enabled. strict ansi gets enabled with --std=c++11 +#though. So we hard undefine it here. Not sure if that has any bad side effects +#This is needed for gtest actually, not for Kokkos itself! +ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) + KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ +endif + +# Setting up dependencies + +KokkosCore_config.h: + +KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS) + +KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) +KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) + +include $(KOKKOS_PATH)/Makefile.targets + +kokkos-clean: + rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a + +libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) + ar cr libkokkos.a $(KOKKOS_OBJ_LINK) + ranlib libkokkos.a + +KOKKOS_LINK_DEPENDS=libkokkos.a diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets new file mode 100644 index 0000000000000000000000000000000000000000..86929ea0fe6e9e2158923e6907c7b2a179e5af61 --- /dev/null +++ b/lib/kokkos/Makefile.targets @@ -0,0 +1,72 @@ +Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp +Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp +Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp +Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp +Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp +Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp +Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp +Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp +Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp +Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp +Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp +Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp +Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp +Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp +KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp +Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp +Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp +Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp +Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp +Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp +Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) +Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp +Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) +Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +endif + +Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp +Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + diff --git a/lib/kokkos/README b/lib/kokkos/README new file mode 100644 index 0000000000000000000000000000000000000000..b094578af631b179e9744f744a823a1800bd885b --- /dev/null +++ b/lib/kokkos/README @@ -0,0 +1,152 @@ +Kokkos implements a programming model in C++ for writing performance portable +applications targeting all major HPC platforms. For that purpose it provides +abstractions for both parallel execution of code and data management. +Kokkos is designed to target complex node architectures with N-level memory +hierarchies and multiple types of execution resources. It currently can use +OpenMP, Pthreads and CUDA as backend programming models. + +The core developers of Kokkos are Carter Edwards and Christian Trott +at the Computer Science Research Institute of the Sandia National +Laboratories. + +The KokkosP interface and associated tools are developed by the Application +Performance Team and Kokkos core developers at Sandia National Laboratories. + +To learn more about Kokkos consider watching one of our presentations: +GTC 2015: + http://on-demand.gputechconf.com/gtc/2015/video/S5166.html + http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf + +A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version +and feedback is greatly appreciated. + +A separate repository with extensive tutorial material can be found under +https://github.com/kokkos/kokkos-tutorials. + +If you have a patch to contribute please feel free to issue a pull request against +the develop branch. For major contributions it is better to contact us first +for guidance. + +For questions please send an email to +kokkos-users@software.sandia.gov + +For non-public questions send an email to +hcedwar(at)sandia.gov and crtrott(at)sandia.gov + +============================================================================ +====Requirements============================================================ +============================================================================ + +Primary tested compilers on X86 are: + GCC 4.7.2 + GCC 4.8.4 + GCC 4.9.2 + GCC 5.1.0 + Intel 14.0.4 + Intel 15.0.2 + Intel 16.0.1 + Clang 3.5.2 + Clang 3.6.1 + +Primary tested compilers on Power 8 are: + IBM XL 13.1.3 (OpenMP,Serial) + GCC 4.9.2 (OpenMP,Serial) + GCC 5.3.0 (OpenMP,Serial) + +Secondary tested compilers are: + CUDA 6.5 (with gcc 4.7.2) + CUDA 7.0 (with gcc 4.7.2) + CUDA 7.5 (with gcc 4.8.4) + +Other compilers working: + X86: + Intel 17.0.042 (the FENL example causes internal compiler error) + PGI 15.4 + Cygwin 2.1.0 64bit with gcc 4.9.3 + KNL: + Intel 16.2.181 (the FENL example causes internal compiler error) + Intel 17.0.042 (the FENL example causes internal compiler error) + +Known non-working combinations: + Power8: + GCC 6.1.0 + Pthreads backend + + +Primary tested compiler are passing in release mode +with warnings as errors. They also are tested with a comprehensive set of +backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). +We are using the following set of flags: +GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits + -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized +Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized +Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized + +Secondary compilers are passing without -Werror. +Other compilers are tested occasionally, in particular when pushing from develop to +master branch, without -Werror and only for a select set of backends. + +============================================================================ +====Getting started========================================================= +============================================================================ + +In the 'example/tutorial' directory you will find step by step tutorial +examples which explain many of the features of Kokkos. They work with +simple Makefiles. To build with g++ and OpenMP simply type 'make openmp' +in the 'example/tutorial' directory. This will build all examples in the +subfolders. + +============================================================================ +====Running Unit Tests====================================================== +============================================================================ + +To run the unit tests create a build directory and run the following commands + +KOKKOS_PATH/generate_makefile.bash +make build-test +make test + +Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====Install the library===================================================== +============================================================================ + +To install Kokkos as a library create a build directory and run the following + +KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH +make lib +make install + +KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====CMakeFiles============================================================== +============================================================================ + +The CMake files contained in this repository require Tribits and are used +for integration with Trilinos. They do not currently support a standalone +CMake build. + +=========================================================================== +====Kokkos and CUDA UVM==================================================== +=========================================================================== + +Kokkos does support UVM as a specific memory space called CudaUVMSpace. +Allocations made with that space are accessible from host and device. +You can tell Kokkos to use that as the default space for Cuda allocations. +In either case UVM comes with a number of restrictions: +(i) You can't access allocations on the host while a kernel is potentially +running. This will lead to segfaults. To avoid that you either need to +call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or +you can set the environment variable CUDA_LAUNCH_BLOCKING=1. +Furthermore in multi socket multi GPU machines, UVM defaults to using +zero copy allocations for technical reasons related to using multiple +GPUs from the same process. If an executable doesn't do that (e.g. each +MPI rank of an application uses a single GPU [can be the same GPU for +multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. +This will enforce proper UVM allocations, but can lead to errors if +more than a single GPU is used by a single process. + diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7853184a5418a5a4d3247cc1b64190719c251635 --- /dev/null +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Algorithms) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1d71d8af341181f689a6a8bf63036b67584cb138 --- /dev/null +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in new file mode 100644 index 0000000000000000000000000000000000000000..67334b70f36b6db55b225f25c91d8a8c4cb3aaab --- /dev/null +++ b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_ALGORITHMS_CONFIG_H +#define KOKKOS_ALGORITHMS_CONFIG_H + +#endif diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dfbf3323c2d51953a12d8e82371d9f971aaa1e13 --- /dev/null +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -0,0 +1,21 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB SOURCES *.cpp) +LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) + +#----------------------------------------------------------------------------- + +TRIBITS_ADD_LIBRARY( + kokkosalgorithms + HEADERS ${HEADERS} + SOURCES ${SOURCES} + DEPLIBS + ) + diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d7c06dc14be99bc63b8f0170843d81067577771e --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -0,0 +1,1751 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_RANDOM_HPP +#define KOKKOS_RANDOM_HPP + +#include <Kokkos_Core.hpp> +#include <Kokkos_Complex.hpp> +#include <cstdio> +#include <cstdlib> +#include <cmath> + +/// \file Kokkos_Random.hpp +/// \brief Pseudorandom number generators +/// +/// These generators are based on Vigna, Sebastiano (2014). "An +/// experimental exploration of Marsaglia's xorshift generators, +/// scrambled." See: http://arxiv.org/abs/1402.6246 + +namespace Kokkos { + + /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type + + template<class Generator,Scalar> + struct rand{ + + //Max value returned by draw(Generator& gen) + KOKKOS_INLINE_FUNCTION + static Scalar max(); + + //Returns a value between zero and max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen); + + //Returns a value between zero and range() + //Note: for floating point values range can be larger than max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& range){} + + //Return value between start and end + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end); + }; + + The Random number generators themselves have two components a state-pool and the actual generator + A state-pool manages a number of generators, so that each active thread is able to grep its own. + This allows the generation of random numbers which are independent between threads. Note that + in contrast to CuRand none of the functions of the pool (or the generator) are collectives, + i.e. all functions can be called inside conditionals. + + template<class Device> + class Pool { + public: + //The Kokkos device type + typedef Device device_type; + //The actual generator type + typedef Generator<Device> generator_type; + + //Default constructor: does not initialize a pool + Pool(); + + //Initializing constructor: calls init(seed,Device_Specific_Number); + Pool(unsigned int seed); + + //Intialize Pool with seed as a starting seed with a pool_size of num_states + //The Random_XorShift64 generator is used in serial to initialize all states, + //thus the intialization process is platform independent and deterministic. + void init(unsigned int seed, int num_states); + + //Get a generator. This will lock one of the states, guaranteeing that each thread + //will have its private generator. Note: on Cuda getting a state involves atomics, + //and is thus not deterministic! + generator_type get_state(); + + //Give a state back to the pool. This unlocks the state, and writes the modified + //state of the generator back to the pool. + void free_state(generator_type gen); + + } + + template<class Device> + class Generator { + public: + //The Kokkos device type + typedef DeviceType device_type; + + //Max return values of respective [X]rand[S]() functions + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast<int>(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)}; + + + //Init with a state and the idx with respect to pool. Note: in serial the + //Generator can be used by just giving it the necessary state arguments + KOKKOS_INLINE_FUNCTION + Generator (STATE_ARGUMENTS, int state_idx = 0); + + //Draw a equidistributed uint32_t in the range (0,MAX_URAND] + KOKKOS_INLINE_FUNCTION + uint32_t urand(); + + //Draw a equidistributed uint64_t in the range (0,MAX_URAND64] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(); + + //Draw a equidistributed uint32_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range); + + //Draw a equidistributed uint32_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ); + + //Draw a equidistributed uint64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range); + + //Draw a equidistributed uint64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ); + + //Draw a equidistributed int in the range (0,MAX_RAND] + KOKKOS_INLINE_FUNCTION + int rand(); + + //Draw a equidistributed int in the range (0,range] + KOKKOS_INLINE_FUNCTION + int rand(const int& range); + + //Draw a equidistributed int in the range (start,end] + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ); + + //Draw a equidistributed int64_t in the range (0,MAX_RAND64] + KOKKOS_INLINE_FUNCTION + int64_t rand64(); + + //Draw a equidistributed int64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range); + + //Draw a equidistributed int64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ); + + //Draw a equidistributed float in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + float frand(); + + //Draw a equidistributed float in the range (0,range] + KOKKOS_INLINE_FUNCTION + float frand(const float& range); + + //Draw a equidistributed float in the range (start,end] + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ); + + //Draw a equidistributed double in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + double drand(); + + //Draw a equidistributed double in the range (0,range] + KOKKOS_INLINE_FUNCTION + double drand(const double& range); + + //Draw a equidistributed double in the range (start,end] + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ); + + //Draw a standard normal distributed double + KOKKOS_INLINE_FUNCTION + double normal() ; + + //Draw a normal distributed double with given mean and standard deviation + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0); + } + + //Additional Functions: + + //Fills view with random numbers in the range (0,range] + template<class ViewType, class PoolType> + void fill_random(ViewType view, PoolType pool, ViewType::value_type range); + + //Fills view with random numbers in the range (start,end] + template<class ViewType, class PoolType> + void fill_random(ViewType view, PoolType pool, + ViewType::value_type start, ViewType::value_type end); + +*/ + + template<class Generator, class Scalar> + struct rand; + + + template<class Generator> + struct rand<Generator,char> { + + KOKKOS_INLINE_FUNCTION + static short max(){return 127;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xff+256)%256);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& range) + {return char(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& start, const char& end) + {return char(gen.rand(start,end));} + + }; + + template<class Generator> + struct rand<Generator,short> { + KOKKOS_INLINE_FUNCTION + static short max(){return 32767;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xffff+65536)%32768);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& range) + {return short(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& start, const short& end) + {return short(gen.rand(start,end));} + + }; + + template<class Generator> + struct rand<Generator,int> { + KOKKOS_INLINE_FUNCTION + static int max(){return Generator::MAX_RAND;} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen) + {return gen.rand();} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& range) + {return gen.rand(range);} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& start, const int& end) + {return gen.rand(start,end);} + + }; + + template<class Generator> + struct rand<Generator,unsigned int> { + KOKKOS_INLINE_FUNCTION + static unsigned int max () { + return Generator::MAX_URAND; + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw (Generator& gen) { + return gen.urand (); + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& range) { + return gen.urand (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned int + draw (Generator& gen, const unsigned int& start, const unsigned int& end) { + return gen.urand (start, end); + } + }; + + template<class Generator> + struct rand<Generator,long> { + KOKKOS_INLINE_FUNCTION + static long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast<long> (Generator::MAX_RAND) : + static_cast<long> (Generator::MAX_RAND64); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast<long> (gen.rand ()) : + static_cast<long> (gen.rand64 ()); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast<long> (gen.rand (static_cast<int> (range))) : + static_cast<long> (gen.rand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& start, const long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast<long> (gen.rand (static_cast<int> (start), + static_cast<int> (end))) : + static_cast<long> (gen.rand64 (start, end)); + } + }; + + template<class Generator> + struct rand<Generator,unsigned long> { + KOKKOS_INLINE_FUNCTION + static unsigned long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast<unsigned long> (Generator::MAX_URAND) : + static_cast<unsigned long> (Generator::MAX_URAND64); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast<unsigned long> (gen.urand ()) : + static_cast<unsigned long> (gen.urand64 ()); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast<unsigned long> (gen.urand (static_cast<unsigned int> (range))) : + static_cast<unsigned long> (gen.urand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static unsigned long + draw (Generator& gen, const unsigned long& start, const unsigned long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast<unsigned long> (gen.urand (static_cast<unsigned int> (start), + static_cast<unsigned int> (end))) : + static_cast<unsigned long> (gen.urand64 (start, end)); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for long + // long, a C99 / C++11 signed type which is guaranteed to be at + // least 64 bits. Do NOT write a partial specialization for + // int64_t!!! This is just a typedef! It could be either long or + // long long. We don't know which a priori, and I've seen both. + // The types long and long long are guaranteed to differ, so it's + // always safe to specialize for both. + template<class Generator> + struct rand<Generator, long long> { + KOKKOS_INLINE_FUNCTION + static long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return Generator::MAX_RAND64; + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (range); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& start, const long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (start, end); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for + // unsigned long long, a C99 / C++11 unsigned type which is + // guaranteed to be at least 64 bits. Do NOT write a partial + // specialization for uint64_t!!! This is just a typedef! It could + // be either unsigned long or unsigned long long. We don't know + // which a priori, and I've seen both. The types unsigned long and + // unsigned long long are guaranteed to differ, so it's always safe + // to specialize for both. + template<class Generator> + struct rand<Generator,unsigned long long> { + KOKKOS_INLINE_FUNCTION + static unsigned long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return Generator::MAX_URAND64; + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return gen.urand64 (); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen, const unsigned long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long + draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (start, end); + } + }; + + template<class Generator> + struct rand<Generator,float> { + KOKKOS_INLINE_FUNCTION + static float max(){return 1.0f;} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen) + {return gen.frand();} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& range) + {return gen.frand(range);} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& start, const float& end) + {return gen.frand(start,end);} + + }; + + template<class Generator> + struct rand<Generator,double> { + KOKKOS_INLINE_FUNCTION + static double max(){return 1.0;} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen) + {return gen.drand();} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& range) + {return gen.drand(range);} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& start, const double& end) + {return gen.drand(start,end);} + + }; + + template<class Generator> + struct rand<Generator, ::Kokkos::complex<float> > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<float> max () { + return ::Kokkos::complex<float> (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<float> draw (Generator& gen) { + const float re = gen.frand (); + const float im = gen.frand (); + return ::Kokkos::complex<float> (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) { + const float re = gen.frand (real (range)); + const float im = gen.frand (imag (range)); + return ::Kokkos::complex<float> (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) { + const float re = gen.frand (real (start), real (end)); + const float im = gen.frand (imag (start), imag (end)); + return ::Kokkos::complex<float> (re, im); + } + }; + + template<class Generator> + struct rand<Generator, ::Kokkos::complex<double> > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<double> max () { + return ::Kokkos::complex<double> (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<double> draw (Generator& gen) { + const double re = gen.drand (); + const double im = gen.drand (); + return ::Kokkos::complex<double> (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) { + const double re = gen.drand (real (range)); + const double im = gen.drand (imag (range)); + return ::Kokkos::complex<double> (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) { + const double re = gen.drand (real (start), real (end)); + const double im = gen.drand (imag (start), imag (end)); + return ::Kokkos::complex<double> (re, im); + } + }; + + template<class DeviceType> + class Random_XorShift64_Pool; + + template<class DeviceType> + class Random_XorShift64 { + private: + uint64_t state_; + const int state_idx_; + friend class Random_XorShift64_Pool<DeviceType>; + public: + + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast<int>(0xffffffff/2)}; + enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffLL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift64 (uint64_t state, int state_idx = 0) + : state_(state),state_idx_(state_idx){} + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + + uint64_t tmp = state_ * 2685821657736338717ULL; + tmp = tmp>>16; + return static_cast<uint32_t>(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + return (state_ * 2685821657736338717ULL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast<int>(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast<int64_t>(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return drand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + + }; + + template<class DeviceType = Kokkos::DefaultExecutionSpace> + class Random_XorShift64_Pool { + private: + typedef View<int*,DeviceType> lock_type; + typedef View<uint64_t*,DeviceType> state_data_type; + lock_type locks_; + state_data_type state_; + int num_states_; + + public: + typedef Random_XorShift64<DeviceType> generator_type; + typedef DeviceType device_type; + + Random_XorShift64_Pool() { + num_states_ = 0; + } + Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift64_Pool(const Random_XorShift64_Pool& src): + locks_(src.locks_), + state_(src.state_), + num_states_(src.num_states_) + {} + + Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + num_states_ = src.num_states_; + return *this; + } + + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64<typename state_data_type::HostMirror::execution_space> gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i) = (((static_cast<uint64_t>(n1)) & 0xffff)<<00) | + (((static_cast<uint64_t>(n2)) & 0xffff)<<16) | + (((static_cast<uint64_t>(n3)) & 0xffff)<<32) | + (((static_cast<uint64_t>(n4)) & 0xffff)<<48); + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift64<DeviceType> get_state() const { + const int i = DeviceType::hardware_thread_id();; + return Random_XorShift64<DeviceType>(state_(i),i); + } + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift64<DeviceType>& state) const { + state_(state.state_idx_) = state.state_; + } + }; + + + template<class DeviceType> + class Random_XorShift1024_Pool; + + template<class DeviceType> + class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t state_[16]; + friend class Random_XorShift1024_Pool<DeviceType>; + public: + + typedef Random_XorShift1024_Pool<DeviceType> pool_type; + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast<int>(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx){ + for(int i=0 ; i<16; i++) + state_[i] = state(state_idx,i); + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast<uint32_t>(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast<int>(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast<int64_t>(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + + + template<class DeviceType = Kokkos::DefaultExecutionSpace> + class Random_XorShift1024_Pool { + private: + typedef View<int*,DeviceType> int_view_type; + typedef View<uint64_t*[16],DeviceType> state_data_type; + + int_view_type locks_; + state_data_type state_; + int_view_type p_; + int num_states_; + friend class Random_XorShift1024<DeviceType>; + + public: + typedef Random_XorShift1024<DeviceType> generator_type; + + typedef DeviceType device_type; + + Random_XorShift1024_Pool() { + num_states_ = 0; + } + + inline + Random_XorShift1024_Pool(uint64_t seed){ + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): + locks_(src.locks_), + state_(src.state_), + p_(src.p_), + num_states_(src.num_states_) + {} + + Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + p_ = src.p_; + num_states_ = src.num_states_; + return *this; + } + + inline + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); + p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename int_view_type::HostMirror h_p = create_mirror_view(p_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64<typename state_data_type::HostMirror::execution_space> gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + for(int j = 0; j < 16 ; j++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i,j) = (((static_cast<uint64_t>(n1)) & 0xffff)<<00) | + (((static_cast<uint64_t>(n2)) & 0xffff)<<16) | + (((static_cast<uint64_t>(n3)) & 0xffff)<<32) | + (((static_cast<uint64_t>(n4)) & 0xffff)<<48); + } + h_p(i) = 0; + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024<DeviceType> get_state() const { + const int i = DeviceType::hardware_thread_id(); + return Random_XorShift1024<DeviceType>(state_,p_(i),i); + }; + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift1024<DeviceType>& state) const { + for(int i = 0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + p_(state.state_idx_) = state.p_; + } + }; + +#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__) + + template<> + class Random_XorShift1024<Kokkos::Cuda> { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool<Kokkos::Cuda>; + public: + + typedef Kokkos::Cuda device_type; + typedef Random_XorShift1024_Pool<device_type> pool_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast<int>(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast<uint32_t>(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast<int>(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast<int64_t>(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + +template<> +inline +Random_XorShift64_Pool<Kokkos::Cuda>::Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift64<Kokkos::Cuda> Random_XorShift64_Pool<Kokkos::Cuda>::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift64<Kokkos::Cuda>(state_(i),i); +#else + return Random_XorShift64<Kokkos::Cuda>(state_(0),0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(const Random_XorShift64<Kokkos::Cuda> &state) const { +#ifdef __CUDA_ARCH__ + state_(state.state_idx_) = state.state_; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +template<> +inline +Random_XorShift1024_Pool<Kokkos::Cuda>::Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift1024<Kokkos::Cuda> Random_XorShift1024_Pool<Kokkos::Cuda>::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift1024<Kokkos::Cuda>(state_, p_(i), i); +#else + return Random_XorShift1024<Kokkos::Cuda>(state_, p_(0), 0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift1024<Kokkos::Cuda> &state) const { +#ifdef __CUDA_ARCH__ + for(int i=0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +#endif + + +namespace Impl { + +template<class ViewType, class RandomPool, int loops, int rank, class IndexType> +struct fill_random_functor_range; +template<class ViewType, class RandomPool, int loops, int rank, class IndexType> +struct fill_random_functor_begin_end; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,1,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const IndexType& i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) + a(idx) = Rand::draw(gen,range); + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,2,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + a(idx,k) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,3,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + a(idx,k,l) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,4, IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + a(idx,k,l,m) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,5,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + a(idx,k,l,m,n) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,6,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + a(idx,k,l,m,n,o) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,7,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++) + a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_range<ViewType,RandomPool,loops,8,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++) + for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++) + a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,1,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) + a(idx) = Rand::draw(gen,begin,end); + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,2,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + a(idx,k) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,3,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + a(idx,k,l) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,4,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + a(idx,k,l,m) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,5,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())){ + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_1());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_2());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_3());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_4());o++) + a(idx,l,m,n,o) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,6,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,7,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++) + a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template<class ViewType, class RandomPool, int loops, class IndexType> +struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8,IndexType>{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j<loops;j++) { + const IndexType idx = i*loops+j; + if(idx<static_cast<IndexType>(a.dimension_0())) { + for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++) + for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++) + for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++) + for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++) + for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++) + for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++) + for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++) + a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +} + +template<class ViewType, class RandomPool, class IndexType = int64_t> +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,Impl::fill_random_functor_range<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,range)); +} + +template<class ViewType, class RandomPool, class IndexType = int64_t> +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,begin,end)); +} +} + +#endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6123ce978c8a385a87ac57bdca45a9ff8517757f --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -0,0 +1,496 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_SORT_HPP_ +#define KOKKOS_SORT_HPP_ + +#include <Kokkos_Core.hpp> + +#include <algorithm> + +namespace Kokkos { + + namespace SortImpl { + + template<class ValuesViewType, int Rank=ValuesViewType::Rank> + struct CopyOp; + + template<class ValuesViewType> + struct CopyOp<ValuesViewType,1> { + template<class DstType, class SrcType> + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + dst(i_dst) = src(i_src); + } + }; + + template<class ValuesViewType> + struct CopyOp<ValuesViewType,2> { + template<class DstType, class SrcType> + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0;j< (int) dst.dimension_1(); j++) + dst(i_dst,j) = src(i_src,j); + } + }; + + template<class ValuesViewType> + struct CopyOp<ValuesViewType,3> { + template<class DstType, class SrcType> + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0; j<dst.dimension_1(); j++) + for(int k = 0; k<dst.dimension_2(); k++) + dst(i_dst,j,k) = src(i_src,j,k); + } + }; + } + +template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space, + class SizeType = typename KeyViewType::memory_space::size_type> +class BinSort { + + +public: + template<class ValuesViewType, class PermuteViewType, class CopyOp> + struct bin_sort_sort_functor { + typedef ExecutionSpace execution_space; + typedef typename ValuesViewType::non_const_type values_view_type; + typedef typename ValuesViewType::const_type const_values_view_type; + Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout, + typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values; + values_view_type sorted_values; + typename PermuteViewType::const_type sort_order; + bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_): + values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + //printf("Sort: %i %i\n",i,sort_order(i)); + CopyOp::copy(sorted_values,i,values,sort_order(i)); + } + }; + + typedef ExecutionSpace execution_space; + typedef BinSortOp bin_op_type; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + +public: + typedef SizeType size_type; + typedef size_type value_type; + + typedef Kokkos::View<size_type*, execution_space> offset_type; + typedef Kokkos::View<const int*, execution_space> bin_count_type; + + + typedef Kokkos::View<typename KeyViewType::const_data_type, + typename KeyViewType::array_layout, + typename KeyViewType::memory_space> const_key_view_type; + typedef Kokkos::View<typename KeyViewType::const_data_type, + typename KeyViewType::array_layout, + typename KeyViewType::memory_space, + Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type; + + typedef typename KeyViewType::non_const_value_type non_const_key_scalar; + typedef typename KeyViewType::const_value_type const_key_scalar; + +private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + +public: + BinSortOp bin_op; + + offset_type bin_offsets; + + Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic; + bin_count_type bin_count_const; + + offset_type sort_order; + + bool sort_within_bins; + +public: + + // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + :keys(keys_),keys_rnd(keys_), bin_op(bin_op_) { + + bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); + sort_order = offset_type("PermutationVector",keys.dimension_0()); + sort_within_bins = sort_within_bins_; + } + + // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this); + + Kokkos::deep_copy(bin_count_atomic,0); + Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this); + + if(sort_within_bins) + Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this); + } + + // Sort a view with respect ot the first dimension using the permutation array + template<class ValuesViewType> + void sort(ValuesViewType values) { + ValuesViewType sorted_values = ValuesViewType("Copy", + values.dimension_0(), + values.dimension_1(), + values.dimension_2(), + values.dimension_3(), + values.dimension_4(), + values.dimension_5(), + values.dimension_6(), + values.dimension_7()); + + parallel_for(values.dimension_0(), + bin_sort_sort_functor<ValuesViewType, offset_type, + SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order)); + + deep_copy(values,sorted_values); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order;} + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets;} + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const {return bin_count_const;} + +public: + KOKKOS_INLINE_FUNCTION + void operator() (const bin_count_tag& tag, const int& i) const { + bin_count_atomic(bin_op.bin(keys,i))++; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const { + if(final) { + bin_offsets(i) = offset; + } + offset+=bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_binning_tag& tag, const int& i) const { + const int bin = bin_op.bin(keys,i); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = i; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_sort_bins_tag& tag, const int&i ) const { + bool sorted = false; + int upper_bound = bin_offsets(i)+bin_count_const(i); + while(!sorted) { + sorted = true; + int old_idx = sort_order(bin_offsets(i)); + int new_idx; + for(int k=bin_offsets(i)+1; k<upper_bound; k++) { + new_idx = sort_order(k); + + if(!bin_op(keys_rnd,old_idx,new_idx)) { + sort_order(k-1) = new_idx; + sort_order(k) = old_idx; + sorted = false; + } else { + old_idx = new_idx; + } + } + upper_bound--; + } + } +}; + +namespace SortImpl { + +template<class KeyViewType> +struct DefaultBinOp1D { + const int max_bins_; + const double mul_; + typename KeyViewType::const_value_type range_; + typename KeyViewType::const_value_type min_; + + //Construct BinOp with number of bins, minimum value and maxuimum value + DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max ) + :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} + + //Determine bin index from key value + template<class ViewType> + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int(mul_*(keys(i)-min_)); + } + + //Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_; + } + + //Compare to keys within a bin if true new_val will be put before old_val + template<class ViewType, typename iType1, typename iType2> + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { + return keys(i1)<keys(i2); + } +}; + +template<class KeyViewType> +struct DefaultBinOp3D { + int max_bins_[3]; + double mul_[3]; + typename KeyViewType::non_const_value_type range_[3]; + typename KeyViewType::non_const_value_type min_[3]; + + DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[] ) + { + max_bins_[0] = max_bins__[0]+1; + max_bins_[1] = max_bins__[1]+1; + max_bins_[2] = max_bins__[2]+1; + mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); + mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); + mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); + range_[0] = max[0]-min[0]; + range_[1] = max[1]-min[1]; + range_[2] = max[2]-min[2]; + min_[0] = min[0]; + min_[1] = min[1]; + min_[2] = min[2]; + } + + template<class ViewType> + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) + + int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) + + int(mul_[2]*(keys(i,2)-min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_[0]*max_bins_[1]*max_bins_[2]; + } + + template<class ViewType, typename iType1, typename iType2> + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const { + if (keys(i1,0)>keys(i2,0)) return true; + else if (keys(i1,0)==keys(i2,0)) { + if (keys(i1,1)>keys(i2,1)) return true; + else if (keys(i1,1)==keys(i2,2)) { + if (keys(i1,2)>keys(i2,2)) return true; + } + } + return false; + } +}; + +template<typename Scalar> +struct min_max { + Scalar min; + Scalar max; + bool init; + + KOKKOS_INLINE_FUNCTION + min_max() { + min = 0; + max = 0; + init = 0; + } + + KOKKOS_INLINE_FUNCTION + min_max (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + } + + KOKKOS_INLINE_FUNCTION + min_max operator = (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const Scalar& val) { + if(init) { + min = min<val?min:val; + max = max>val?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const min_max& val) { + if(init && val.init) { + min = min<val.min?min:val.min; + max = max>val.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const Scalar& val) volatile { + if(init) { + min = min<val?min:val; + max = max>val?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const min_max& val) volatile { + if(init && val.init) { + min = min<val.min?min:val.min; + max = max>val.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } +}; + + +template<class ViewType> +struct min_max_functor { + typedef typename ViewType::execution_space execution_space; + ViewType view; + typedef min_max<typename ViewType::non_const_value_type> value_type; + min_max_functor (const ViewType view_):view(view_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, value_type& val) const { + val += view(i); + } +}; + +template<class ViewType> +bool try_std_sort(ViewType view) { + bool possible = true; +#if ! KOKKOS_USING_EXP_VIEW + size_t stride[8]; + view.stride(stride); +#else + size_t stride[8] = { view.stride_0() + , view.stride_1() + , view.stride_2() + , view.stride_3() + , view.stride_4() + , view.stride_5() + , view.stride_6() + , view.stride_7() + }; +#endif + possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value; + possible = possible && (ViewType::Rank == 1); + possible = possible && (stride[0] == 1); + if(possible) { + std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0()); + } + return possible; +} + +} + +template<class ViewType> +void sort(ViewType view, bool always_use_kokkos_sort = false) { + if(!always_use_kokkos_sort) { + if(SortImpl::try_std_sort(view)) return; + } + + typedef SortImpl::DefaultBinOp1D<ViewType> CompType; + SortImpl::min_max<typename ViewType::non_const_value_type> val; + parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val); + BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true); + bin_sort.create_permute_vector(); + bin_sort.sort(view); +} + +/*template<class ViewType, class Comparator> +void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) { + +}*/ + +} + +#endif diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..654104b44e7b395c6937f4c1dc35b4933018268e --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -0,0 +1,38 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + UnitTestMain.cpp + TestCuda.cpp + ) + +SET(LIBRARIES kokkoscore) + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES + TestOpenMP.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Serial) + LIST( APPEND SOURCES + TestSerial.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES + TestThreads.cpp + ) +ENDIF() + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest + SOURCES ${SOURCES} + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5d79364c52abc7a8a61769d187fc06e5612e203b --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -0,0 +1,92 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_OpenMP + TEST_TARGETS += test-openmp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Serial + TEST_TARGETS += test-serial +endif + +KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda + +KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads + +KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP + +KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial + +test-cuda: KokkosAlgorithms_UnitTest_Cuda + ./KokkosAlgorithms_UnitTest_Cuda + +test-threads: KokkosAlgorithms_UnitTest_Threads + ./KokkosAlgorithms_UnitTest_Threads + +test-openmp: KokkosAlgorithms_UnitTest_OpenMP + ./KokkosAlgorithms_UnitTest_OpenMP + +test-serial: KokkosAlgorithms_UnitTest_Serial + ./KokkosAlgorithms_UnitTest_Serial + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d19c778c4663bff82e50037d2d1b6ffaeeff103d --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdint.h> +#include <iostream> +#include <iomanip> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#ifdef KOKKOS_HAVE_CUDA + +#include <TestRandom.hpp> +#include <TestSort.hpp> + +namespace Test { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +void cuda_test_random_xorshift64( int num_draws ) +{ + Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws); +} + +void cuda_test_random_xorshift1024( int num_draws ) +{ + Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws); +} + + +#define CUDA_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( cuda, Random_XorShift64 ) { \ + cuda_test_random_xorshift64(num_draws); \ + } + +#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( cuda, Random_XorShift1024 ) { \ + cuda_test_random_xorshift1024(num_draws); \ + } + +#define CUDA_SORT_UNSIGNED( size ) \ + TEST_F( cuda, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Cuda, unsigned >(size); \ + } + +CUDA_RANDOM_XORSHIFT64( 132141141 ) +CUDA_RANDOM_XORSHIFT1024( 52428813 ) +CUDA_SORT_UNSIGNED(171) + +#undef CUDA_RANDOM_XORSHIFT64 +#undef CUDA_RANDOM_XORSHIFT1024 +#undef CUDA_SORT_UNSIGNED +} + +#endif /* #ifdef KOKKOS_HAVE_CUDA */ + diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b06dffcb6a068503770229091ab15330bf6af89 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- +#include <TestRandom.hpp> +#include <TestSort.hpp> +#include <iomanip> + +namespace Test { + +#ifdef KOKKOS_HAVE_OPENMP +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned threads_count = omp_get_max_threads(); + + if ( Kokkos::hwloc::available() ) { + threads_count = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa(); + } + + Kokkos::OpenMP::initialize( threads_count ); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + } +}; + +#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( openmp, Random_XorShift64 ) { \ + Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \ + } + +#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( openmp, Random_XorShift1024 ) { \ + Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \ + } + +#define OPENMP_SORT_UNSIGNED( size ) \ + TEST_F( openmp, SortUnsigned ) { \ + Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \ + } + +OPENMP_RANDOM_XORSHIFT64( 10240000 ) +OPENMP_RANDOM_XORSHIFT1024( 10130144 ) +OPENMP_SORT_UNSIGNED(171) + +#undef OPENMP_RANDOM_XORSHIFT64 +#undef OPENMP_RANDOM_XORSHIFT1024 +#undef OPENMP_SORT_UNSIGNED +#endif +} // namespace test + diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c906b9f2cdc69735a225698c2bb5dc0e152160cb --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -0,0 +1,481 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DUALVIEW_HPP +#define KOKKOS_TEST_DUALVIEW_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <impl/Kokkos_Timer.hpp> +#include <Kokkos_Core.hpp> +#include <Kokkos_Random.hpp> +#include <cmath> +#include <chrono> + +namespace Test { + +namespace Impl{ + +// This test runs the random number generators and uses some statistic tests to +// check the 'goodness' of the random numbers: +// (i) mean: the mean is expected to be 0.5*RAND_MAX +// (ii) variance: the variance is 1/3*mean*mean +// (iii) covariance: the covariance is 0 +// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers +// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers + +#define HIST_DIM3D 24 +#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D) + +struct RandomProperties { + uint64_t count; + double mean; + double variance; + double covariance; + double min; + double max; + + KOKKOS_INLINE_FUNCTION + RandomProperties() { + count = 0; + mean = 0.0; + variance = 0.0; + covariance = 0.0; + min = 1e64; + max = -1e64; + } + + KOKKOS_INLINE_FUNCTION + RandomProperties& operator+=(const RandomProperties& add) { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.min<min?add.min:min; + max = add.max>max?add.max:max; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile RandomProperties& add) volatile { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.min<min?add.min:min; + max = add.max>max?add.max:max; + } +}; + +template<class GeneratorPool, class Scalar> +struct test_random_functor { + typedef typename GeneratorPool::generator_type rnd_type; + + typedef RandomProperties value_type; + typedef typename GeneratorPool::device_type device_type; + + GeneratorPool rand_pool; + const double mean; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View types below. + typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d; + type_1d density_1d; + typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d; + type_3d density_3d; + + test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) : + rand_pool (rand_pool_), + mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()), + density_1d (d1d), + density_3d (d3d) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (int i, RandomProperties& prop) const { + using Kokkos::atomic_fetch_add; + + rnd_type rand_gen = rand_pool.get_state(); + for (int k = 0; k < 1024; ++k) { + const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen); + prop.count++; + prop.mean += tmp; + prop.variance += (tmp-mean)*(tmp-mean); + const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen); + prop.count++; + prop.mean += tmp2; + prop.variance += (tmp2-mean)*(tmp2-mean); + prop.covariance += (tmp-mean)*(tmp2-mean); + const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen); + prop.count++; + prop.mean += tmp3; + prop.variance += (tmp3-mean)*(tmp3-mean); + prop.covariance += (tmp2-mean)*(tmp3-mean); + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to + // define an exclusive upper bound on the range of random + // numbers that draw() can generate. However, for the float + // specialization, some implementations might violate this upper + // bound, due to rounding error. Just in case, we have left an + // extra space at the end of each dimension of density_1d and + // density_3d. + // + // Please note that those extra entries might not get counted in + // the histograms. However, if Kokkos::rand is broken and only + // returns values of max(), the histograms will still catch this + // indirectly, since none of the other values will be filled in. + + const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max (); + + const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax); + const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax); + const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax); + + const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax); + const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax); + const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax); + + atomic_fetch_add (&density_1d(ind1_1d), 1); + atomic_fetch_add (&density_1d(ind2_1d), 1); + atomic_fetch_add (&density_1d(ind3_1d), 1); + atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); + } + rand_pool.free_state(rand_gen); + } +}; + +template<class DeviceType> +struct test_histogram1d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d; + type_1d density_1d; + double mean; + + test_histogram1d_functor (type_1d d1d, int num_draws) : + density_1d (d1d), + mean (1.0*num_draws/HIST_DIM1D*3) + { + } + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_1d(i); + prop.mean += count; + prop.variance += 1.0 * (count - mean) * (count - mean); + //prop.covariance += 1.0*count*count; + prop.min = count < prop.min ? count : prop.min; + prop.max = count > prop.max ? count : prop.max; + if (i < static_cast<size_type> (HIST_DIM1D-1)) { + prop.covariance += (count - mean) * (density_1d(i+1) - mean); + } + } +}; + +template<class DeviceType> +struct test_histogram3d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d; + type_3d density_3d; + double mean; + + test_histogram3d_functor (type_3d d3d, int num_draws) : + density_3d (d3d), + mean (1.0*num_draws/HIST_DIM1D) + {} + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D), + (i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + i % HIST_DIM3D); + prop.mean += count; + prop.variance += (count - mean) * (count - mean); + if (i < static_cast<size_type> (HIST_DIM1D-1)) { + const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D), + ((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + (i+1)%HIST_DIM3D); + prop.covariance += (count - mean) * (count_next - mean); + } + } +}; + +// +// Templated test that uses the above functors. +// +template <class RandomGenerator,class Scalar> +struct test_random_scalar { + typedef typename RandomGenerator::generator_type rnd_type; + + int pass_mean,pass_var,pass_covar; + int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar; + int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar; + + test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d, + typename test_random_functor<RandomGenerator,int>::type_3d& density_3d, + RandomGenerator& pool, + unsigned int num_draws) + { + using std::cerr; + using std::endl; + using Kokkos::parallel_reduce; + + { + cerr << " -- Testing randomness properties" << endl; + + RandomProperties result; + typedef test_random_functor<RandomGenerator, Scalar> functor_type; + parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); + + //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); + double tolerance = 1.6*sqrt(1.0/num_draws); + double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max(); + double variance_expect = 1.0/3.0*mean_expect*mean_expect; + double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; + double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0; + double covariance_eps = result.covariance/num_draws/2/variance_expect; + pass_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_var = ((-1.5*tolerance < variance_eps) && + ( 1.5*tolerance > variance_eps)) ? 1:0; + pass_covar = ((-2.0*tolerance < covariance_eps) && + ( 2.0*tolerance > covariance_eps)) ? 1:0; + cerr << "Pass: " << pass_mean + << " " << pass_var + << " " << mean_eps + << " " << variance_eps + << " " << covariance_eps + << " || " << tolerance << endl; + } + { + cerr << " -- Testing 1-D histogram" << endl; + + RandomProperties result; + typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws*3/HIST_DIM1D; + double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist1d_mean = ((-0.0001 < mean_eps) && + ( 0.0001 > mean_eps)) ? 1:0; + pass_hist1d_var = ((-0.07 < variance_eps) && + ( 0.07 > variance_eps)) ? 1:0; + pass_hist1d_covar = ((-0.06 < covariance_eps) && + ( 0.06 > covariance_eps)) ? 1:0; + + cerr << "Density 1D: " << mean_eps + << " " << variance_eps + << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) + << " || " << tolerance + << " " << result.min + << " " << result.max + << " || " << result.variance/HIST_DIM1D + << " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D) + << " || " << result.covariance/HIST_DIM1D + << " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D + << endl; + } + { + cerr << " -- Testing 3-D histogram" << endl; + + RandomProperties result; + typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws/HIST_DIM1D; + double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist3d_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_hist3d_var = ((-1.2*tolerance < variance_eps) && + ( 1.2*tolerance > variance_eps)) ? 1:0; + pass_hist3d_covar = ((-tolerance < covariance_eps) && + ( tolerance > covariance_eps)) ? 1:0; + + cerr << "Density 3D: " << mean_eps + << " " << variance_eps + << " " << result.covariance/HIST_DIM1D/HIST_DIM1D + << " || " << tolerance + << " " << result.min + << " " << result.max << endl; + } + } +}; + +template <class RandomGenerator> +void test_random(unsigned int num_draws) +{ + using std::cerr; + using std::endl; + typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d"); + typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d"); + + + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + cerr << "Test Seed:" << ticks << endl; + + RandomGenerator pool(ticks); + + cerr << "Test Scalar=int" << endl; + test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int.pass_mean,1); + ASSERT_EQ( test_int.pass_var,1); + ASSERT_EQ( test_int.pass_covar,1); + ASSERT_EQ( test_int.pass_hist1d_mean,1); + ASSERT_EQ( test_int.pass_hist1d_var,1); + ASSERT_EQ( test_int.pass_hist1d_covar,1); + ASSERT_EQ( test_int.pass_hist3d_mean,1); + ASSERT_EQ( test_int.pass_hist3d_var,1); + ASSERT_EQ( test_int.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=unsigned int" << endl; + test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint.pass_mean,1); + ASSERT_EQ( test_uint.pass_var,1); + ASSERT_EQ( test_uint.pass_covar,1); + ASSERT_EQ( test_uint.pass_hist1d_mean,1); + ASSERT_EQ( test_uint.pass_hist1d_var,1); + ASSERT_EQ( test_uint.pass_hist1d_covar,1); + ASSERT_EQ( test_uint.pass_hist3d_mean,1); + ASSERT_EQ( test_uint.pass_hist3d_var,1); + ASSERT_EQ( test_uint.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=int64_t" << endl; + test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int64.pass_mean,1); + ASSERT_EQ( test_int64.pass_var,1); + ASSERT_EQ( test_int64.pass_covar,1); + ASSERT_EQ( test_int64.pass_hist1d_mean,1); + ASSERT_EQ( test_int64.pass_hist1d_var,1); + ASSERT_EQ( test_int64.pass_hist1d_covar,1); + ASSERT_EQ( test_int64.pass_hist3d_mean,1); + ASSERT_EQ( test_int64.pass_hist3d_var,1); + ASSERT_EQ( test_int64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=uint64_t" << endl; + test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint64.pass_mean,1); + ASSERT_EQ( test_uint64.pass_var,1); + ASSERT_EQ( test_uint64.pass_covar,1); + ASSERT_EQ( test_uint64.pass_hist1d_mean,1); + ASSERT_EQ( test_uint64.pass_hist1d_var,1); + ASSERT_EQ( test_uint64.pass_hist1d_covar,1); + ASSERT_EQ( test_uint64.pass_hist3d_mean,1); + ASSERT_EQ( test_uint64.pass_hist3d_var,1); + ASSERT_EQ( test_uint64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=float" << endl; + test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_float.pass_mean,1); + ASSERT_EQ( test_float.pass_var,1); + ASSERT_EQ( test_float.pass_covar,1); + ASSERT_EQ( test_float.pass_hist1d_mean,1); + ASSERT_EQ( test_float.pass_hist1d_var,1); + ASSERT_EQ( test_float.pass_hist1d_covar,1); + ASSERT_EQ( test_float.pass_hist3d_mean,1); + ASSERT_EQ( test_float.pass_hist3d_var,1); + ASSERT_EQ( test_float.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=double" << endl; + test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_double.pass_mean,1); + ASSERT_EQ( test_double.pass_var,1); + ASSERT_EQ( test_double.pass_covar,1); + ASSERT_EQ( test_double.pass_hist1d_mean,1); + ASSERT_EQ( test_double.pass_hist1d_var,1); + ASSERT_EQ( test_double.pass_hist1d_covar,1); + ASSERT_EQ( test_double.pass_hist3d_mean,1); + ASSERT_EQ( test_double.pass_hist3d_var,1); + ASSERT_EQ( test_double.pass_hist3d_covar,1); +} +} + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp new file mode 100644 index 0000000000000000000000000000000000000000..741cf97ae13f245fafeb95078222943afda8ed1d --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <TestRandom.hpp> +#include <TestSort.hpp> +#include <iomanip> + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_SERIAL +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision (5) << std::scientific; + Kokkos::Serial::initialize (); + } + + static void TearDownTestCase () + { + Kokkos::Serial::finalize (); + } +}; + +#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( serial, Random_XorShift64 ) { \ + Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \ + } + +#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( serial, Random_XorShift1024 ) { \ + Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \ + } + +#define SERIAL_SORT_UNSIGNED( size ) \ + TEST_F( serial, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Serial, unsigned >(size); \ + } + +SERIAL_RANDOM_XORSHIFT64( 10240000 ) +SERIAL_RANDOM_XORSHIFT1024( 10130144 ) +SERIAL_SORT_UNSIGNED(171) + +#undef SERIAL_RANDOM_XORSHIFT64 +#undef SERIAL_RANDOM_XORSHIFT1024 +#undef SERIAL_SORT_UNSIGNED + +#endif // KOKKOS_HAVE_SERIAL +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ccbcbdd0011bbc577ac8c39b2f593ed35f2546ac --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -0,0 +1,206 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef TESTSORT_HPP_ +#define TESTSORT_HPP_ + +#include <gtest/gtest.h> +#include<Kokkos_Core.hpp> +#include<Kokkos_Random.hpp> +#include<Kokkos_Sort.hpp> + +namespace Test { + +namespace Impl{ + +template<class ExecutionSpace, class Scalar> +struct is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<Scalar*,ExecutionSpace> keys; + + is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + if(keys(i)>keys(i+1)) count++; + } +}; + +template<class ExecutionSpace, class Scalar> +struct sum { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<Scalar*,ExecutionSpace> keys; + + sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i); + } +}; + +template<class ExecutionSpace, class Scalar> +struct bin3d_is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<Scalar*[3],ExecutionSpace> keys; + + int max_bins; + Scalar min; + Scalar max; + + bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_): + keys(keys_),max_bins(max_bins_),min(min_),max(max_) { + } + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + int ix1 = int ((keys(i,0)-min)/max * max_bins); + int iy1 = int ((keys(i,1)-min)/max * max_bins); + int iz1 = int ((keys(i,2)-min)/max * max_bins); + int ix2 = int ((keys(i+1,0)-min)/max * max_bins); + int iy2 = int ((keys(i+1,1)-min)/max * max_bins); + int iz2 = int ((keys(i+1,2)-min)/max * max_bins); + + if (ix1>ix2) count++; + else if(ix1==ix2) { + if (iy1>iy2) count++; + else if ((iy1==iy2) && (iz1>iz2)) count++; + } + } +}; + +template<class ExecutionSpace, class Scalar> +struct sum3D { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<Scalar*[3],ExecutionSpace> keys; + + sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i,0); + count+=keys(i,1); + count+=keys(i,2); + } +}; + +template<class ExecutionSpace, typename KeyType> +void test_1D_sort(unsigned int n,bool force_kokkos) { + typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType; + KeyViewType keys("Keys",n); + + Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); + Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before); + + Kokkos::sort(keys,force_kokkos); + + Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after); + Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template<class ExecutionSpace, typename KeyType> +void test_3D_sort(unsigned int n) { + typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType; + + KeyViewType keys("Keys",n*n*n); + + Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); + Kokkos::fill_random(keys,g,100.0); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before); + + int bin_1d = 1; + while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2; + int bin_max[3] = {bin_1d,bin_1d,bin_1d}; + typename KeyViewType::value_type min[3] = {0,0,0}; + typename KeyViewType::value_type max[3] = {100,100,100}; + + typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp; + BinOp bin_op(bin_max,min,max); + Kokkos::BinSort< KeyViewType , BinOp > + Sorter(keys,bin_op,false); + Sorter.create_permute_vector(); + Sorter.template sort< KeyViewType >(keys); + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after); + Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template<class ExecutionSpace, typename KeyType> +void test_sort(unsigned int N) +{ + test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true); + test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false); + test_3D_sort<ExecutionSpace,KeyType>(N); +} + +} +} +#endif /* TESTSORT_HPP_ */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a61d6c8bd59bb9758f7ff30124b048150ac0cb92 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <TestRandom.hpp> +#include <TestSort.hpp> +#include <iomanip> + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_PTHREAD +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + // * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +#define THREADS_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( threads, Random_XorShift64 ) { \ + Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \ + } + +#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( threads, Random_XorShift1024 ) { \ + Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \ + } + +#define THREADS_SORT_UNSIGNED( size ) \ + TEST_F( threads, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Threads, double >(size); \ + } + + +THREADS_RANDOM_XORSHIFT64( 10240000 ) +THREADS_RANDOM_XORSHIFT1024( 10130144 ) +THREADS_SORT_UNSIGNED(171) + +#undef THREADS_RANDOM_XORSHIFT64 +#undef THREADS_RANDOM_XORSHIFT1024 +#undef THREADS_SORT_UNSIGNED + +#endif +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f952ab3db51028aff0a0ebfe313b2639e353ab87 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/cmake/Dependencies.cmake b/lib/kokkos/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8c51eab4d78b68f9c01e64f63352a22cf8f2086d --- /dev/null +++ b/lib/kokkos/cmake/Dependencies.cmake @@ -0,0 +1,10 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + #SubPackageName Directory Class Req/Opt + # + # New Kokkos subpackages: + Core core PS REQUIRED + Containers containers PS OPTIONAL + Algorithms algorithms PS OPTIONAL + Example example EX OPTIONAL + ) diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake new file mode 100644 index 0000000000000000000000000000000000000000..801c20067b9195db5ba5e6cd6fdd62a426e6e294 --- /dev/null +++ b/lib/kokkos/cmake/deps/CUDA.cmake @@ -0,0 +1,79 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +SET(_CUDA_FAILURE OFF) + +# Have CMake find CUDA +IF(NOT _CUDA_FAILURE) + FIND_PACKAGE(CUDA 3.2) + IF (NOT CUDA_FOUND) + SET(_CUDA_FAILURE ON) + ENDIF() +ENDIF() + +IF(NOT _CUDA_FAILURE) + # if we haven't met failure + macro(PACKAGE_ADD_CUDA_LIBRARY cuda_target) + TRIBITS_ADD_LIBRARY(${cuda_target} ${ARGN} CUDALIBRARY) + endmacro() + GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) + GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ELSE() + SET(TPL_ENABLE_CUDA OFF) +ENDIF() diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake new file mode 100644 index 0000000000000000000000000000000000000000..205f5e2a98898b8247b0f199afcc2e3ac4bc97b4 --- /dev/null +++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake @@ -0,0 +1,64 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +include(${TRIBITS_DEPS_DIR}/CUDA.cmake) + +IF (TPL_ENABLE_CUDA) + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ENDIF() + diff --git a/lib/kokkos/cmake/deps/HWLOC.cmake b/lib/kokkos/cmake/deps/HWLOC.cmake new file mode 100644 index 0000000000000000000000000000000000000000..275abd3a5d4ecfb3ce3b207f978959f6f9019061 --- /dev/null +++ b/lib/kokkos/cmake/deps/HWLOC.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards <hcedwar AT sandia.gov> +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) diff --git a/lib/kokkos/cmake/deps/Pthread.cmake b/lib/kokkos/cmake/deps/Pthread.cmake new file mode 100644 index 0000000000000000000000000000000000000000..46d0a939cad0e6c5479cb20da1d37ba5ca509b8c --- /dev/null +++ b/lib/kokkos/cmake/deps/Pthread.cmake @@ -0,0 +1,83 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/deps/QTHREAD.cmake b/lib/kokkos/cmake/deps/QTHREAD.cmake new file mode 100644 index 0000000000000000000000000000000000000000..994b72b20096f4462beab51d19e4410cd73bf05b --- /dev/null +++ b/lib/kokkos/cmake/deps/QTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards <hcedwar AT sandia.gov> +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake new file mode 100644 index 0000000000000000000000000000000000000000..aad1e2bad7629f3f43ca91135752253a20ac9523 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake @@ -0,0 +1,75 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1") + MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)") +ELSE() + IF(CMAKE_VERSION VERSION_LESS "2.8.8") + # FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must + find_library(CUDA_cusparse_LIBRARY + cusparse + HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND") + MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.") + ENDIF() + ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8") + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) +ENDIF() + diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake new file mode 100644 index 0000000000000000000000000000000000000000..715b3e9bde59379c632fbec7926b425e6189e74d --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake @@ -0,0 +1,71 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards <hcedwar AT sandia.gov> +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) + diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake new file mode 100644 index 0000000000000000000000000000000000000000..fc401d7543357f18d225a33efe0cf3bb489170d7 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake @@ -0,0 +1,82 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake new file mode 100644 index 0000000000000000000000000000000000000000..994b72b20096f4462beab51d19e4410cd73bf05b --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards <hcedwar AT sandia.gov> +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake new file mode 100644 index 0000000000000000000000000000000000000000..34cd216f810c9a829dbcdc13ed5e9c3be81752ac --- /dev/null +++ b/lib/kokkos/cmake/tribits.cmake @@ -0,0 +1,485 @@ +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "") +ENDMACRO() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) + ASSERT_DEFINED(${VARNAME}) + IF (${VARNAME}) + SET(TMP ${${VARNAME}}) + LIST(REMOVE_DUPLICATES TMP) + GLOBAL_SET(${VARNAME} ${TMP}) + ENDIF() +ENDFUNCTION() + +MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) + MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") + SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) + IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") + IF(${USER_OPTION_NAME}) + GLOBAL_SET(${MACRO_DEFINE_NAME} ON) + ELSE() + GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) + ENDIF() + ENDIF() +ENDMACRO() + +FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + +ENDFUNCTION() + +MACRO(TRIBITS_ADD_DEBUG_OPTION) + TRIBITS_ADD_OPTION_AND_DEFINE( + ${PROJECT_NAME}_ENABLE_DEBUG + HAVE_${PROJECT_NAME_UC}_DEBUG + "Enable a host of runtime debug checking." + OFF + ) +ENDMACRO() + + +MACRO(TRIBITS_ADD_TEST_DIRECTORIES) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) + + IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + +ENDMACRO() + +MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) + SET(PROP_VALUES) + FOREACH(TARGET_X ${ARGN}) + LIST(APPEND PROP_VALUES "$<TARGET_PROPERTY:${TARGET_X},${PROP_IN}>") + ENDFOREACH() + SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}") +ENDMACRO() + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + +# Older versions of cmake does not make include directories transitive +MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME) + TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN}) + FOREACH(DEP_LIB ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $<TARGET_PROPERTY:${DEP_LIB},INTERFACE_INCLUDE_DIRECTORIES>) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $<TARGET_PROPERTY:${DEP_LIB},INCLUDE_DIRECTORIES>) + ENDFOREACH() +ENDMACRO() + +FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME) + + SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY) + SET(oneValueArgs) + SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + + # Local variable to hold all of the libraries that will be directly linked + # to this library. + SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS}) + + # Add dependent libraries passed directly in + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + IF (PARSE_DEPLIBS) + LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS}) + ENDIF() + + # Add the library and all the dependencies + + IF (PARSE_DEFINES) + ADD_DEFINITIONS(${PARSE_DEFINES}) + ENDIF() + + IF (PARSE_STATIC) + SET(STATIC_KEYWORD "STATIC") + ELSE() + SET(STATIC_KEYWORD) + ENDIF() + + IF (PARSE_SHARED) + SET(SHARED_KEYWORD "SHARED") + ELSE() + SET(SHARED_KEYWORD) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + IF (NOT PARSE_CUDALIBRARY) + ADD_LIBRARY( + ${LIBRARY_NAME} + ${STATIC_KEYWORD} + ${SHARED_KEYWORD} + ${EXCLUDE_FROM_ALL_KEYWORD} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ELSE() + CUDA_ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ENDIF() + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS}) + + IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + FILES ${PARSE_HEADERS} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + ENDIF() + + IF (NOT PARSE_TESTONLY) + PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME}) + REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS) + ENDIF() + +ENDFUNCTION() + +FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME) + + SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY) + SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF (PARSE_TARGET_DEFINES) + TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES}) + ENDIF() + + SET(LINK_LIBS PACKAGE_${PACKAGE_NAME}) + + IF (PARSE_TESTONLYLIBS) + LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS}) + ENDIF() + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + SET (EXE_SOURCES) + IF(PARSE_DIRECTORY) + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + IF(IS_ABSOLUTE ${SOURCE_FILE}) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ELSE() + SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE}) + ENDIF() + ENDFOREACH( ) + ELSE() + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ENDFOREACH( ) + ENDIF() + + SET(EXE_BINARY_NAME ${EXE_NAME}) + IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX) + SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME}) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES}) + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS}) + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_INSTALLABLE) + INSTALL( + TARGETS ${EXE_BINARY_NAME} + EXPORT ${PROJECT_NAME} + DESTINATION bin + ) + ENDIF() +ENDFUNCTION() + +ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) + +FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) + + SET(options STANDARD_PASS_OUTPUT WILL_FAIL) + SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS}) + + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) + ENDIF() + ADD_DEPENDENCIES(check ${TEST_NAME}) + + IF(PARSE_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL}) + ENDIF() + + IF(PARSE_ADDED_TESTS_NAMES_OUT) + SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + +ENDFUNCTION() + +MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + + SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL) + SET(oneValueArgs) + SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + + + IF (_${TPL_NAME}_ENABLE_SUCCESS) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + +ENDFUNCTION() + +MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) + GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) + INCLUDE("${TPL_FILE}") + IF(TARGET TPL_LIB_${TPL_NAME}) + MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} TRUE) + ELSE() + MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} FALSE) + ENDIF() +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() + +MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ENABLE_TPLS) + FOREACH(TPL ${ARGN}) + IF(TARGET ${TPL}) + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE) + ELSE() + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE) + ENDIF() + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES) + + SET(options) + SET(oneValueArgs) + SET(multiValueArgs + LIB_REQUIRED_PACKAGES + LIB_OPTIONAL_PACKAGES + TEST_REQUIRED_PACKAGES + TEST_OPTIONAL_PACKAGES + LIB_REQUIRED_TPLS + LIB_OPTIONAL_TPLS + TEST_REQUIRED_TPLS + TEST_OPTIONAL_TPLS + REGRESSION_EMAIL_LIST + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + ) + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + GLOBAL_SET(${PACKAGE_NAME}_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS}) + + GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS}) + + TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS}) + +ENDMACRO() + +MACRO(TRIBITS_SUBPACKAGE NAME) + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + + GLOBAL_SET(${PACKAGE_NAME}_LIBS "") + + INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake) + +ENDMACRO(TRIBITS_SUBPACKAGE) + +MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS}) +ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + +MACRO(TRIBITS_PACKAGE_DECL NAME) + + PROJECT(${NAME}) + STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC) + SET(PACKAGE_NAME ${PROJECT_NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + FOREACH(TPL_FILE ${TPLS_FILES}) + TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + ENDFOREACH() + +ENDMACRO() + + +MACRO(TRIBITS_PROCESS_SUBPACKAGES) + FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake) + FOREACH(SUBPACKAGE ${SUBPACKAGES}) + GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY) + GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY) + ADD_SUBDIRECTORY(${SUBPACKAGE_DIR}) + ENDFOREACH() +ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES) + +MACRO(TRIBITS_PACKAGE_DEF) +ENDMACRO(TRIBITS_PACKAGE_DEF) + +MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) +ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) + +MACRO(TRIBITS_EXCLUDE_FILES) +ENDMACRO(TRIBITS_EXCLUDE_FILES) + +MACRO(TRIBITS_PACKAGE_POSTPROCESS) +ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS) + diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..17287fb8486977927e4ba29718c79a438378d0a4 --- /dev/null +++ b/lib/kokkos/config/configure_compton_cpu.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with Intel compiler + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f9aee13f957d503d7fa5a5b1c8ecf924a80e0d8 --- /dev/null +++ b/lib/kokkos/config/configure_compton_mic.sh @@ -0,0 +1,186 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build for MIC architecture: + +INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh new file mode 100755 index 0000000000000000000000000000000000000000..592e7f593639c617385b487183f8ea6111cbd732 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos.sh @@ -0,0 +1,293 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +#----------------------------------------------------------------------------- + +USE_CUDA_ARCH= +USE_THREAD= +USE_OPENMP= +USE_INTEL= +USE_XEON_PHI= +HWLOC_BASE_DIR= +MPI_BASE_DIR= +BLAS_LIB_DIR= +LAPACK_LIB_DIR= + +if [ 1 ] ; then + # Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Xeon Phi and hwloc + USE_OPENMP=ON + USE_INTEL=ON + USE_XEON_PHI=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +fi + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure command line options: + +CMAKE_CONFIGURE="" +CMAKE_CXX_FLAGS="" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- + +if [ 1 ] ; then + + # Configure for Tpetra/Kokkos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode" + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE" + + if [ -n "${USE_CUDA_ARCH}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON" + + fi + +fi + +if [ 1 ] ; then + + # Configure for Stokhos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON" + +fi + +if [ 1 ] ; then + + # Configure for TrilinosCouplings: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" +fi + +#----------------------------------------------------------------------------- +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use pthread configuation: + +if [ "${USE_THREAD}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use OpenMP configuation: + +if [ "${USE_OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${USE_CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}" + + if [ "${USE_OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +# Cross-compile for Intel Xeon Phi: + +if [ "${USE_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +if [ -n "${CMAKE_CXX_FLAGS}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'" + +fi + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}" + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_bgq.sh b/lib/kokkos/config/configure_kokkos_bgq.sh new file mode 100755 index 0000000000000000000000000000000000000000..73236937eaa9b311d06027a23aa070a8f62c9153 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_bgq.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# to build: +# build on bgq-b[1-12] +# module load sierra-devel +# run this configure file +# make + +# to run: +# ssh bgq-login +# cd /scratch/username/... +# export OMP_PROC_BIND and XLSMPOPTS environment variables +# run with srun + +# Note: hwloc does not work to get or set cpubindings on bgq. +# Use the openmp backend and the openmp environment variables. +# +# Only the mpi wrappers seem to be setup for cross-compile, +# so it is important that this configure enables MPI and uses mpigcc wrappers. + + + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="../Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2" + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_dev.sh b/lib/kokkos/config/configure_kokkos_dev.sh new file mode 100755 index 0000000000000000000000000000000000000000..ac61dec602381b52d96f91a59c0eddbc2d6b5801 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_dev.sh @@ -0,0 +1,216 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +#CMAKE_BUILD_TYPE=DEBUG +#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +#CUDA_ARCH="" +#CUDA_ARCH="20" +#CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON +PTHREADS=ON + +# Build host code with Intel compiler: + +INTEL=OFF + +# Build for MIC architecture: + +INTEL_XEON_PHI=OFF + +# Build with HWLOC at location: + +#HWLOC_BASE_DIR="" +#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +# Location for MPI to use in examples: + +#MPI_BASE_DIR="" +#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" +MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3" +#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +if [ "${PTHREADS}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh new file mode 100755 index 0000000000000000000000000000000000000000..f78b7dce7845474402d61793084d59cfff579e4a --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_nvidia.sh @@ -0,0 +1,204 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON + +# Build host code with Intel compiler: + +# INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="/home/sems/common/openmpi/current" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh new file mode 100755 index 0000000000000000000000000000000000000000..8bd175b0314ffc74c9c6ffb02188d599a1b5a573 --- /dev/null +++ b/lib/kokkos/config/configure_shannon.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build host code with Intel compiler: + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh new file mode 100755 index 0000000000000000000000000000000000000000..0baa83aefee66f6db0e131c2a0c77ce62d7545b6 --- /dev/null +++ b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# This script uses CUDA, OpenMP, and MPI. +# +# Before invoking this script, set the OMPI_CXX environment variable +# to point to nvcc_wrapper, wherever it happens to live. (If you use +# an MPI implementation other than OpenMPI, set the corresponding +# environment variable instead.) +# + +rm -f CMakeCache.txt; +rm -rf CMakeFiles +EXTRA_ARGS=$@ +MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5" +CUDA_PATH="/opt/nvidia/cuda/6.5.14" + +# +# As long as there are any .cu files in Trilinos, we'll need to set +# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and +# lets nvcc_wrapper handle them as .cpp files, then we won't need to +# set CUDA_NVCC_FLAGS. As it is, given that we need to set +# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as +# nvcc_wrapper passes to nvcc. +# +CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM" + +cmake \ + -D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \ + -D CMAKE_BUILD_TYPE:STRING=DEBUG \ + -D CMAKE_CXX_FLAGS:STRING="-g -Wall" \ + -D CMAKE_C_FLAGS:STRING="-g -Wall" \ + -D CMAKE_FORTRAN_FLAGS:STRING="" \ + -D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \ + -D Trilinos_ENABLE_Triutils=OFF \ + -D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \ + -D Trilinos_ENABLE_DEBUG:BOOL=OFF \ + -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \ + -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \ + -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \ + -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ + -D BUILD_SHARED_LIBS:BOOL=OFF \ + -D DART_TESTING_TIMEOUT:STRING=600 \ + -D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \ + \ + \ + -D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \ + -D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \ + -D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \ + \ + \ + -D Trilinos_ENABLE_CXX11:BOOL=OFF \ + -D TPL_ENABLE_MPI:BOOL=ON \ + -D Trilinos_ENABLE_OpenMP:BOOL=ON \ + -D Trilinos_ENABLE_ThreadPool:BOOL=ON \ + \ + \ + -D TPL_ENABLE_CUDA:BOOL=ON \ + -D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \ + -D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \ + -D TPL_ENABLE_Thrust:BOOL=OFF \ + -D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \ + -D TPL_ENABLE_CUSPARSE:BOOL=OFF \ + -D TPL_ENABLE_Cusp:BOOL=OFF \ + -D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \ + -D CUDA_VERBOSE_BUILD:BOOL=OFF \ + -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \ + \ + \ + -D TPL_ENABLE_HWLOC=OFF \ + -D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \ + -D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \ + -D TPL_ENABLE_BinUtils=OFF \ + -D TPL_ENABLE_BLAS:STRING=ON \ + -D TPL_ENABLE_LAPACK:STRING=ON \ + -D TPL_ENABLE_MKL:STRING=OFF \ + -D TPL_ENABLE_HWLOC:STRING=OFF \ + -D TPL_ENABLE_GTEST:STRING=ON \ + -D TPL_ENABLE_SuperLU=ON \ + -D TPL_ENABLE_BLAS=ON \ + -D TPL_ENABLE_LAPACK=ON \ + -D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \ + -D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \ + \ + \ + -D Trilinos_Enable_Kokkos:BOOL=ON \ + -D Trilinos_ENABLE_KokkosCore:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \ + -D Trilinos_ENABLE_KokkosContainers:BOOL=ON \ + -D Trilinos_ENABLE_TpetraKernels:BOOL=ON \ + -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \ + -D Trilinos_ENABLE_KokkosExample:BOOL=ON \ + -D Kokkos_ENABLE_EXAMPLES:BOOL=ON \ + -D Kokkos_ENABLE_TESTS:BOOL=OFF \ + -D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \ + -D TpetraClassic_ENABLE_OpenMPNode=OFF \ + -D TpetraClassic_ENABLE_TPINode=OFF \ + -D TpetraClassic_ENABLE_MKL=OFF \ + -D Kokkos_ENABLE_Cuda_UVM=ON \ + \ + \ + -D Trilinos_ENABLE_Teuchos:BOOL=ON \ + -D Teuchos_ENABLE_COMPLEX:BOOL=OFF \ + \ + \ + -D Trilinos_ENABLE_Tpetra:BOOL=ON \ + -D Tpetra_ENABLE_KokkosCore=ON \ + -D Tpetra_ENABLE_Kokkos_DistObject=OFF \ + -D Tpetra_ENABLE_Kokkos_Refactor=ON \ + -D Tpetra_ENABLE_TESTS=ON \ + -D Tpetra_ENABLE_EXAMPLES=ON \ + -D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \ + \ + \ + -D Trilinos_ENABLE_Belos=OFF \ + -D Trilinos_ENABLE_Amesos=OFF \ + -D Trilinos_ENABLE_Amesos2=OFF \ + -D Trilinos_ENABLE_Ifpack=OFF \ + -D Trilinos_ENABLE_Ifpack2=OFF \ + -D Trilinos_ENABLE_Epetra=OFF \ + -D Trilinos_ENABLE_EpetraExt=OFF \ + -D Trilinos_ENABLE_Zoltan=OFF \ + -D Trilinos_ENABLE_Zoltan2=OFF \ + -D Trilinos_ENABLE_MueLu=OFF \ + -D Belos_ENABLE_TESTS=ON \ + -D Belos_ENABLE_EXAMPLES=ON \ + -D MueLu_ENABLE_TESTS=ON \ + -D MueLu_ENABLE_EXAMPLES=ON \ + -D Ifpack2_ENABLE_TESTS=ON \ + -D Ifpack2_ENABLE_EXAMPLES=ON \ + $EXTRA_ARGS \ +${HOME}/Trilinos + diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f56f2fd48d30da63f28662431711c8b20d1f4a5 --- /dev/null +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -0,0 +1,153 @@ +// -------------------------------------------------------------------------------- // + +The following steps are for workstations/servers with the SEMS environment installed. + +// -------------------------------------------------------------------------------- // +Summary: + +- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers. + +- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch. + +- Step 3: Build and test Trilinos with combinations of compilers, types, backends. + +- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures. + +- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos. +// -------------------------------------------------------------------------------- // + + +// -------------------------------------------------------------------------------- // + +Step 1: + 1.1. Update kokkos develop branch (NOT a fork) + + (From kokkos directory): + git fetch --all + git checkout develop + git reset --hard origin/develop + + 1.2. Create a testing directory - here the directory is created within the kokkos directory + + mkdir testing + cd testing + + 1.3. Run the test_all_sandia script; various compiler and build-list options can be specified + + ../config/test_all_sandia + + 1.4 Clean repository of untracked files + + cd ../ + git clean -df + +// -------------------------------------------------------------------------------- // + +Step 2: + 2.1 Update Trilinos develop branch + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files + + module load python/2.7.9 + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + +// -------------------------------------------------------------------------------- // + +Step 3: + 3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s): + + - GCC/4.7.2-OpenMP/Complex + Run tests with the following environment variable: + + export OMP_NUM_THREADS=2 + + + - Intel/15.0.2-Serial/NoComplex + + + - GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex + Run tests with the following environment variables: + + export CUDA_LAUNCH_BLOCKING=1 + export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 + + + mkdir Build + cd Build + cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./ + ** Set the path to Trilinos appropriately within the configure-all script ** + source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos + source configure-all + make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example) + ctest + + 3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot + +// -------------------------------------------------------------------------------- // + +Step 4: + 4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github + + - DO NOT fast-forward the merge!!!! + + (From kokkos directory): + git checkout master + git fetch --all + # Ensure we are on the current origin/master + git reset --hard origin/master + git merge --no-ff origin/develop + + 4.2. Update the tag in kokkos/config/master_history.txt + Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate + Tag format: #.#.## + + # Prepend master_history.txt with + + # tag: #.#.## + # date: mm/dd/yyyy + # master: sha1 + # develop: sha1 + # ----------------------- + + git commit --amend -a + + git tag -a #.#.## + tag: #.#.## + date: mm/dd/yyyy + master: sha1 + develop: sha1 + + git push --follow-tags origin master + +// -------------------------------------------------------------------------------- // + +Step 5: + 5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 5.2. Snapshot Kokkos master branch into Trilinos + + (From kokkos directory): + git fetch --all + git checkout tags/#.#.## + git clean -df + + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + + 5.3. Push the updated develop branch of Trilinos to Github - congratulations!!! + + (From Trilinos directory): + git push + +// -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh new file mode 100755 index 0000000000000000000000000000000000000000..fa588c778f68330ff130364e9425d5a6aefa357c --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, Threads, Qthread, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# Qthread + +QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh new file mode 100755 index 0000000000000000000000000000000000000000..c2e17bb9443ad37576b490149d63e1d7b9f9b1ef --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh new file mode 100755 index 0000000000000000000000000000000000000000..39b72d5ce136ff2ea00c6e1cc4a049eb02d606ee --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh new file mode 100755 index 0000000000000000000000000000000000000000..b83a535416064febc732a4fd0d7dbf34274b1adf --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# C++11, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically activate + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh new file mode 100755 index 0000000000000000000000000000000000000000..d2e06a4ebd92080b255754b80b2af6ba93662090 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh @@ -0,0 +1,78 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# <none> +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh new file mode 100755 index 0000000000000000000000000000000000000000..e2ab1f1c00168ed3ea646c9f297dc040e2c5a33f --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP, Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh new file mode 100755 index 0000000000000000000000000000000000000000..fd56d41161a567bca1eb3601600cdc9fbe3b0104 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 intel/13.SP1.1.106 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-omp.sh new file mode 100755 index 0000000000000000000000000000000000000000..f91ecd525488c40a1d92c9143e727a4a287dfefb --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-omp.sh @@ -0,0 +1,77 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh new file mode 100755 index 0000000000000000000000000000000000000000..19ab96902340f6ad757fd84546afc2061bdad024 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh @@ -0,0 +1,87 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Threads, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2eb674578f2c14442376210dfd1080050fe3917 --- /dev/null +++ b/lib/kokkos/config/master_history.txt @@ -0,0 +1,3 @@ +tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4 +tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a + diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper new file mode 100755 index 0000000000000000000000000000000000000000..6093cb61bdaf5a3f030406b8e149580b818920d0 --- /dev/null +++ b/lib/kokkos/config/nvcc_wrapper @@ -0,0 +1,280 @@ +#!/bin/bash +# +# This shell script (nvcc_wrapper) wraps both the host compiler and +# NVCC, if you are building legacy C or C++ code with CUDA enabled. +# The script remedies some differences between the interface of NVCC +# and that of the host compiler, in particular for linking. +# It also means that a legacy code doesn't need separate .cu files; +# it can just use .cpp files. +# +# Default settings: change those according to your machine. For +# example, you may have have two different wrappers with either icpc +# or g++ as their back-end compiler. The defaults can be overwritten +# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). + +default_arch="sm_35" +#default_arch="sm_50" + +# +# The default C++ compiler. +# +host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} +#host_compiler="icpc" +#host_compiler="/usr/local/gcc/4.8.3/bin/g++" +#host_compiler="/usr/local/gcc/4.9.1/bin/g++" + +# +# Internal variables +# + +# C++ files +cpp_files="" + +# Host compiler arguments +xcompiler_args="" + +# Cuda (NVCC) only arguments +cuda_args="" + +# Arguments for both NVCC and Host compiler +shared_args="" + +# Linker arguments +xlinker_args="" + +# Object files passable to NVCC +object_files="" + +# Link objects for the host linker only +object_files_xlinker="" + +# Shared libraries with version numbers are not handled correctly by NVCC +shared_versioned_libraries_host="" +shared_versioned_libraries="" + +# Does the User set the architecture +arch_set=0 + +# Does the user overwrite the host compiler +ccbin_set=0 + +#Error code of compilation +error_code=0 + +# Do a dry run without actually compiling +dry_run=0 + +# Skip NVCC compilation and use host compiler directly +host_only=0 + +# Enable workaround for CUDA 6.5 for pragma ident +replace_pragma_ident=0 + +# Mark first host compiler argument +first_xcompiler_arg=1 + +temp_dir=${TMPDIR:-/tmp} + +# Check if we have an optimization argument already +optimization_applied=0 + +#echo "Arguments: $# $@" + +while [ $# -gt 0 ] +do + case $1 in + #show the executed command + --show|--nvcc-wrapper-show) + dry_run=1 + ;; + #run host compilation only + --host-only) + host_only=1 + ;; + #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros + --replace-pragma-ident) + replace_pragma_ident=1 + ;; + #handle source files to be compiled as cuda files + *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) + cpp_files="$cpp_files $1" + ;; + # Ensure we only have one optimization flag because NVCC doesn't allow muliple + -O*) + if [ $optimization_applied -eq 1 ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." + else + shared_args="$shared_args $1" + optimization_applied=1 + fi + ;; + #Handle shared args (valid for both nvcc and the host compiler) + -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + shared_args="$shared_args $1" + ;; + #Handle shared args that have an argument + -o|-MT) + shared_args="$shared_args $1 $2" + shift + ;; + #Handle known nvcc args + -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) + cuda_args="$cuda_args $1" + ;; + #Handle known nvcc args that have an argument + -rdc|-maxrregcount|--default-stream) + cuda_args="$cuda_args $1 $2" + shift + ;; + #Handle c++11 setting + --std=c++11|-std=c++11) + shared_args="$shared_args $1" + ;; + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 + -std=c++98|--std=c++98) + ;; + #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor + -pedantic|-Wpedantic|-ansi) + ;; + #strip -Xcompiler because we add it + -Xcompiler) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$2" + fi + shift + ;; + #strip of "-x cu" because we add that + -x) + if [[ $2 != "cu" ]]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,$2" + fi + fi + shift + ;; + #Handle -ccbin (if its not set we can set it to a default value) + -ccbin) + cuda_args="$cuda_args $1 $2" + ccbin_set=1 + host_compiler=$2 + shift + ;; + #Handle -arch argument (if its not set use a default + -arch*) + cuda_args="$cuda_args $1" + arch_set=1 + ;; + #Handle -Xcudafe argument + -Xcudafe) + cuda_args="$cuda_args -Xcudafe $2" + shift + ;; + #Handle args that should be sent to the linker + -Wl*) + xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" + host_linker_args="$host_linker_args ${1:4:${#1}}" + ;; + #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking + *.a|*.so|*.o|*.obj) + object_files="$object_files $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking + *.dylib) + object_files="$object_files -Xlinker $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle shared libraries with *.so.* names which nvcc can't do. + *.so.*) + shared_versioned_libraries_host="$shared_versioned_libraries_host $1" + shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" + ;; + #All other args are sent to the host compiler + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args=$1 + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + + shift +done + +#Add default host compiler if necessary +if [ $ccbin_set -ne 1 ]; then + cuda_args="$cuda_args -ccbin $host_compiler" +fi + +#Add architecture command +if [ $arch_set -ne 1 ]; then + cuda_args="$cuda_args -arch=$default_arch" +fi + +#Compose compilation command +nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" +if [ $first_xcompiler_arg -eq 0 ]; then + nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" +fi + +#Compose host only command +host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" + +#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' +if [ $replace_pragma_ident -eq 1 ]; then + cpp_files2="" + for file in $cpp_files + do + var=`grep pragma ${file} | grep ident | grep "#"` + if [ "${#var}" -gt 0 ] + then + sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file + cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" + else + cpp_files2="$cpp_files2 $file" + fi + done + cpp_files=$cpp_files2 + #echo $cpp_files +fi + +if [ "$cpp_files" ]; then + nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" +else + nvcc_command="$nvcc_command $object_files" +fi + +if [ "$cpp_files" ]; then + host_command="$host_command $object_files $cpp_files" +else + host_command="$host_command $object_files" +fi + +#Print command for dryrun +if [ $dry_run -eq 1 ]; then + if [ $host_only -eq 1 ]; then + echo $host_command + else + echo $nvcc_command + fi + exit 0 +fi + +#Run compilation command +if [ $host_only -eq 1 ]; then + $host_command +else + $nvcc_command +fi +error_code=$? + +#Report error code +exit $error_code diff --git a/lib/kokkos/config/snapshot.py b/lib/kokkos/config/snapshot.py new file mode 100755 index 0000000000000000000000000000000000000000..d816cd0c9c1962c0c95c672467319474a72669fb --- /dev/null +++ b/lib/kokkos/config/snapshot.py @@ -0,0 +1,279 @@ +#! /usr/bin/env python + +""" +Snapshot a project into another project and perform the necessary repo actions +to provide a commit message that can be used to trace back to the exact point +in the source repository. +""" + +#todo: +# Support svn +# Allow renaming of the source dir in the destination path +# Check if a new snapshot is necessary? +# + +import sys + +#check the version number so that there is a good error message when argparse is not available. +#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced +#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this +#will need to change, but for now it is not safe to allow 3.x to run this. +if sys.version_info[:2] != (2, 7): + print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1]) + sys.exit(1) + +import subprocess, argparse, re, doctest, os, datetime, traceback + +def parse_cmdline(description): + parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description) + + parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True, + help="Do not perform a commit or create a commit message.") + parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False, + help="Enable verbose mode.") + parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False, + help="Enable debugging output.") + parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False, + help="Reduce the validation that the source and destination repos are clean to a warning.") + parser.add_argument("--source-repo", choices=["git","none"], default="", + help="Type of repository of the source, use none to skip all repository operations.") + parser.add_argument("--dest-repo", choices=["git","none"], default="", + help="Type of repository of the destination, use none to skip all repository operations.") + + parser.add_argument("source", help="Source project to snapshot from.") + parser.add_argument("destination", help="Destination to snapshot too.") + + options = parser.parse_args() + options = validate_options(options) + return options +#end parseCmdline + +def validate_options(options): + apparent_source_repo_type="none" + apparent_dest_repo_type="none" + + #prevent user from accidentally giving us a path that rsync will treat differently than expected. + options.source = options.source.rstrip(os.sep) + options.destination = options.destination.rstrip(os.sep) + + options.source = os.path.abspath(options.source) + options.destination = os.path.abspath(options.destination) + + if os.path.exists(options.source): + apparent_source_repo_type, source_root = deterimine_repo_type(options.source) + else: + raise RuntimeError("Could not find source directory of %s." % options.source) + options.source_root = source_root + + if not os.path.exists(options.destination): + print "Could not find destination directory of %s so it will be created." % options.destination + os.makedirs(options.destination) + + apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination) + options.dest_root = dest_root + + #error on svn repo types for now + if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn": + raise RuntimeError("SVN repositories are not supported at this time.") + + if options.source_repo == "": + #source repo type is not specified to just using the apparent type. + options.source_repo = apparent_source_repo_type + else: + if options.source_repo != "none" and options.source_repo != apparent_source_repo_type: + raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \ + (options.source_repo, apparent_source_repo_type)) + + if options.dest_repo == "": + #destination repo type is not specified to just using the apparent type. + options.dest_repo = apparent_dest_repo_type + else: + if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type: + raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \ + (options.dest_repo, apparent_dest_repo_type)) + + return options +#end validate_options + +def run_cmd(cmd, options, working_dir="."): + cmd_str = " ".join(cmd) + if options.verbose_mode: + print "Running command '%s' in dir %s." % (cmd_str, working_dir) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir) + proc_stdout, proc_stderr = proc.communicate() + ret_val = proc.wait() + + if options.debug_mode: + print "==== %s stdout start ====" % cmd_str + print proc_stdout + print "==== %s stdout end ====" % cmd_str + print "==== %s stderr ====" % cmd_str + print proc_stderr + print "==== %s stderr ====" % cmd_str + + if ret_val != 0: + raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \ + (cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout)) + + return proc_stdout, proc_stderr +#end run_cmd + +def deterimine_repo_type(location): + apparent_repo_type = "none" + + while location != "": + if os.path.exists(os.path.join(location, ".git")): + apparent_repo_type = "git" + break + elif os.path.exists(os.path.join(location, ".svn")): + apparent_repo_type = "svn" + break + else: + location = location[:location.rfind(os.sep)] + + return apparent_repo_type, location + +#end deterimine_repo_type + +def rsync(source, dest, options): + rsync_cmd = ["rsync", "-ar", "--delete"] + if options.debug_mode: + rsync_cmd.append("-v") + + if options.source_repo == "git": + rsync_cmd.append("--exclude=.git") + + rsync_cmd.append(options.source) + rsync_cmd.append(options.destination) + run_cmd(rsync_cmd, options) +#end rsync + +def create_commit_message(commit_id, commit_log, project_name, project_location): + eol = os.linesep + message = "Snapshot of %s from commit %s" % (project_name, commit_id) + message += eol * 2 + message += "From repository at %s" % project_location + message += eol * 2 + message += "At commit:" + eol + message += commit_log + return message +#end create_commit_message + +def find_git_commit_information(options): + r""" + >>> class fake_options: + ... source="." + ... verbose_mode=False + ... debug_mode=False + >>> myoptions = fake_options() + >>> find_git_commit_information(myoptions)[2:] + ('sems', 'software.sandia.gov:/git/sems') + """ + git_log_cmd = ["git", "log", "-1"] + + output, error = run_cmd(git_log_cmd, options, options.source) + + commit_match = re.match("commit ([0-9a-fA-F]+)", output) + commit_id = commit_match.group(1) + commit_log = output + + git_remote_cmd = ["git", "remote", "-v"] + output, error = run_cmd(git_remote_cmd, options, options.source) + + remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE) + if not remote_match: + raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source)) + + source_location = remote_match.group(1) + source_name = remote_match.group(2).strip() + + if source_name[-1] == "/": + source_name = source_name[:-1] + + return commit_id, commit_log, source_name, source_location + +#end find_git_commit_information + +def do_git_commit(message, options): + if options.verbose_mode: + print "Commiting to destination repository." + + git_add_cmd = ["git", "add", "-A"] + run_cmd(git_add_cmd, options, options.destination) + + git_commit_cmd = ["git", "commit", "-m%s" % message] + run_cmd(git_commit_cmd, options, options.destination) + + git_log_cmd = ["git", "log", "--format=%h", "-1"] + commit_sha1, error = run_cmd(git_log_cmd, options, options.destination) + + print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root) +#end do_git_commit + +def verify_git_repo_clean(location, options): + git_status_cmd = ["git", "status", "--porcelain"] + output, error = run_cmd(git_status_cmd, options, location) + + if output != "": + if options.no_validate_repo == False: + raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot." + % (location, os.linesep)) + else: + print "WARNING: %s is not clean. Proceeding anyway." % location + print "WARNING: This could lead to differences in the source and destination." + print "WARNING: It could also lead to extra files being included in the snapshot commit." + +#end verify_git_repo_clean + +def main(options): + if options.verbose_mode: + print "Snapshotting %s to %s." % (options.source, options.destination) + + if options.source_repo == "git": + verify_git_repo_clean(options.source, options) + commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options) + elif options.source_repo == "none": + commit_id = "N/A" + commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now()) + repo_name = options.source + repo_location = options.source + + commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2 + + if options.dest_repo == "git": + verify_git_repo_clean(options.destination, options) + + rsync(options.source, options.destination, options) + + if options.dest_repo == "git": + do_git_commit(commit_message, options) + elif options.dest_repo == "none": + file_name = "snapshot_message.txt" + message_file = open(file_name, "w") + message_file.write(commit_message) + message_file.close() + cwd = os.getcwd() + print "No commit done by request. Please use file at:" + print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep) + + + + +#end main + +if (__name__ == "__main__"): + if ("--test" in sys.argv): + doctest.testmod() + sys.exit(0) + + try: + options = parse_cmdline(__doc__) + main(options) + except RuntimeError, e: + print "Error occured:", e + if "--debug" in sys.argv: + traceback.print_exc() + sys.exit(1) + else: + sys.exit(0) diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia new file mode 100755 index 0000000000000000000000000000000000000000..aac036a8f37abfedabac7a4849289ecb3cbdfcd0 --- /dev/null +++ b/lib/kokkos/config/test_all_sandia @@ -0,0 +1,539 @@ +#!/bin/bash -e + +# +# Global config +# + +set -o pipefail + +# Determine current machine + +MACHINE="" +HOSTNAME=$(hostname) +if [[ "$HOSTNAME" =~ (white|ride).* ]]; then + MACHINE=white +elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then + MACHINE=bowman +elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name + MACHINE=shepard +elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then + MACHINE=sems +else + echo "Unrecognized machine" >&2 + exit 1 +fi + +GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" +INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" +CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" + +GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="" + +# Default. Machine specific can override +DEBUG=False +ARGS="" +CUSTOM_BUILD_LIST="" +DRYRUN=False +BUILD_ONLY=False +declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 +TEST_SCRIPT=False +SKIP_HWLOC=False + +ARCH_FLAG="" + +# +# Machine specific config +# + +if [ "$MACHINE" = "sems" ]; then + source /projects/modulefiles/utils/sems-modules-init.sh + source /projects/modulefiles/utils/kokkos-modules-init.sh + + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + +elif [ "$MACHINE" = "white" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2" + + # Don't do pthread on white + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=Power8" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "bowman" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=KNL" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "shepard" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=HSW" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +else + echo "Unhandled machine $MACHINE" >&2 + exit 1 +fi + +export OMP_NUM_THREADS=4 + +declare -i NUM_RESULTS_TO_KEEP=7 + +RESULT_ROOT_PREFIX=TestAll + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) + +# +# Handle arguments +# + +while [[ $# > 0 ]] +do +key="$1" +case $key in +--kokkos-path*) +KOKKOS_PATH="${key#*=}" +;; +--build-list*) +CUSTOM_BUILD_LIST="${key#*=}" +;; +--debug*) +DEBUG=True +;; +--build-only*) +BUILD_ONLY=True +;; +--test-script*) +TEST_SCRIPT=True +;; +--skip-hwloc*) +SKIP_HWLOC=True +;; +--num*) +NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" +;; +--dry-run*) +DRYRUN=True +;; +--help) +echo "test_all_sandia <ARGS> <OPTIONS>:" +echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" +echo " Defaults to root repo containing this script" +echo "--debug: Run tests in debug. Defaults to False" +echo "--test-script: Test this script, not Kokkos" +echo "--skip-hwloc: Do not do hwloc tests" +echo "--num=N: Number of jobs to run in parallel " +echo "--dry-run: Just print what would be executed" +echo "--build-only: Just do builds, don't run anything" +echo "--build-list=BUILD,BUILD,BUILD..." +echo " Provide a comma-separated list of builds instead of running all builds" +echo " Valid items:" +echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" +echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" +echo "" + +echo "ARGS: list of expressions matching compilers to test" +echo " supported compilers sems" +for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" +done +echo "" + +echo "Examples:" +echo " Run all tests" +echo " % test_all_sandia" +echo "" +echo " Run all gcc tests" +echo " % test_all_sandia gcc" +echo "" +echo " Run all gcc/4.7.2 and all intel tests" +echo " % test_all_sandia gcc/4.7.2 intel" +echo "" +echo " Run all tests in debug" +echo " % test_all_sandia --debug" +echo "" +echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" +echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" +echo "" +echo "If you want to kill the tests, do:" +echo " hit ctrl-z" +echo " % kill -9 %1" +echo +exit 0 +;; +*) +# args, just append +ARGS="$ARGS $1" +;; +esac +shift +done + +# set kokkos path +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +# set build type +if [ "$DEBUG" = "True" ]; then + BUILD_TYPE=debug +else + BUILD_TYPE=release +fi + +# If no args provided, do all compilers +if [ -z "$ARGS" ]; then + ARGS='?' +fi + +# Process args to figure out which compilers to test +COMPILERS_TO_TEST="" +for ARG in $ARGS; do + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done +done + +# +# Functions +# + +# get_compiler_name <COMPILER> +get_compiler_name() { + echo $1 | cut -d/ -f1 +} + +# get_compiler_version <COMPILER> +get_compiler_version() { + echo $1 | cut -d/ -f2 +} + +# Do not call directly +get_compiler_data() { + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) + + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g" + return 0 + fi + done + + # Not found + echo "Unreconized compiler $compiler" >&2 + exit 1 +} + +# +# For all getters, usage: <GETTER> <COMPILER> +# + +get_compiler_modules() { + get_compiler_data $1 1 +} + +get_compiler_build_list() { + get_compiler_data $1 2 +} + +get_compiler_exe_name() { + get_compiler_data $1 3 +} + +get_compiler_warning_flags() { + get_compiler_data $1 4 +} + +run_cmd() { + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi +} + +# report_and_log_test_results <SUCCESS> <DESC> <COMMENT> +report_and_log_test_result() { + # Use sane var names + local success=$1; local desc=$2; local comment=$3; + + if [ "$success" = "0" ]; then + echo " PASSED $desc" + echo $comment > $PASSED_DIR/$desc + else + # For failures, comment should be the name of the phase that failed + echo " FAILED $desc" >&2 + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log + fi +} + +setup_env() { + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) + + module purge + + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done + + return 0 +} + +# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE> +single_build_and_test() { + # Use sane var names + local compiler=$1; local build=$2; local build_type=$3; + + # set up env + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + + # Set up flags + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) + + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + else + local cxxflags="-O3 $compiler_warning_flags" + fi + + if [[ "$compiler" == cuda* ]]; then + cxxflags="--keep --keep-dir=$(pwd) $cxxflags" + export TMPDIR=$(pwd) + fi + + # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags" + + echo " Starting job $desc" + + local comment="no_comment" + + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } + fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) + run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" + fi + fi + + report_and_log_test_result 0 $desc "$comment" + + return 0 +} + +# wait_for_jobs <NUM-JOBS> +wait_for_jobs() { + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done +} + +# run_in_background <COMPILER> <BUILD> <BUILD_TYPE> +run_in_background() { + local compiler=$1 + + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + if [[ "$BUILD_ONLY" == True ]]; then + num_jobs=8 + else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + fi + wait_for_jobs $num_jobs + + single_build_and_test $* & +} + +# build_and_test_all <COMPILER> +build_and_test_all() { + # Get compiler data + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi + + # do builds + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE + + # If not cuda, do a hwloc test too + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" + fi + done + + return 0 +} + +get_test_root_dir() { + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi + + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") +} + +wait_summarize_and_exit() { + wait_for_jobs 1 + + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" + + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done + + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + local -i rv=0 + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + + exit $rv +} + +# +# Main +# + +ROOT_DIR=$(get_test_root_dir) +mkdir -p $ROOT_DIR +cd $ROOT_DIR + +PASSED_DIR=$ROOT_DIR/results/passed +FAILED_DIR=$ROOT_DIR/results/failed +mkdir -p $PASSED_DIR +mkdir -p $FAILED_DIR + +echo "Going to test compilers: " $COMPILERS_TO_TEST +for COMPILER in $COMPILERS_TO_TEST; do + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER +done + +wait_summarize_and_exit diff --git a/lib/kokkos/config/testing_scripts/README b/lib/kokkos/config/testing_scripts/README new file mode 100644 index 0000000000000000000000000000000000000000..455afffd840514e98686dadcd2c46a774590456c --- /dev/null +++ b/lib/kokkos/config/testing_scripts/README @@ -0,0 +1,5 @@ +jenkins_test_driver is designed to be run through Jenkins as a +multiconfiguration job. It relies on a number of environment variables that will +only be set when run in that context. It is possible to override these if you +know the Jenkins job setup. It is not recommended that a non-expert try to run +this script directly. diff --git a/lib/kokkos/config/testing_scripts/jenkins_test_driver b/lib/kokkos/config/testing_scripts/jenkins_test_driver new file mode 100755 index 0000000000000000000000000000000000000000..9cba7fa51856b59706a8c6655aef42b39847766b --- /dev/null +++ b/lib/kokkos/config/testing_scripts/jenkins_test_driver @@ -0,0 +1,83 @@ +#!/bin/bash -x + +echo "Building for BUILD_TYPE = ${BUILD_TYPE}" +echo "Building with HOST_COMPILER = ${HOST_COMPILER}" +echo "Building in ${WORKSPACE}" + +module use /home/projects/modulefiles + +BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "` +build_options="" +for item in ${BUILD_TYPE}; do + build_options="$build_options --with-$item" +done + +kokkos_path=${WORKSPACE}/kokkos +gtest_path=${WORKSPACE}/kokkos/tpls/gtest + +echo ${WORKSPACE} +pwd + +#extract information from the provided parameters. +host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"` +cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"` + +host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"` +cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"` +build_path=`echo $BUILD_TYPE | tr " " "_"` + +module load $host_compiler_module +module load $cuda_compiler_module + +case $host_compiler_brand in + gcc) + module load nvcc-wrapper/gnu + compiler=g++ + ;; + intel) + module load nvcc-wrapper/intel + compiler=icpc + ;; + *) + echo "Unrecognized compiler brand." + exit 1 + ;; +esac + +#if cuda is on we need to set the host compiler for the +#nvcc wrapper and make the wrapper the compiler. +if [ $cuda_compiler != "" ]; then + export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler + compiler=$kokkos_path/config/nvcc_wrapper +fi + +if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then + echo "Intel compilers are not supported with cuda at this time." + exit 0 +fi + +rm -rf test-$build_path +mkdir test-$build_path +cd test-$build_path + +/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Configure failed." + exit 1 +fi + +make build-test 2>&1 | tee build.log + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Build failed." + exit 1 +fi + +make test 2>&1 | tee test.log + +grep "FAIL" test.log +if [ $? == 0 ]; then + echo "Tests failed." + exit 1 +fi diff --git a/lib/kokkos/config/testing_scripts/obj_size_opt_check b/lib/kokkos/config/testing_scripts/obj_size_opt_check new file mode 100755 index 0000000000000000000000000000000000000000..47c84d1a92a8a288115ecf0d416d57b349fb69b4 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/obj_size_opt_check @@ -0,0 +1,287 @@ +#! /usr/bin/env python + +""" +Compute the size at which the current compiler will start to +significantly scale back optimization. + +The CPP file being modified will need the following tags. +// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate +// JGF_DUPLICATE_END - Put after end of function to duplcate +// JGF_DUPE function_name(args); - Put anywhere where it's legal to +put a function call but not in your timing section. + +The program will need to output the string: +FOM: <number> +This will represent the program's performance +""" + +import argparse, sys, os, doctest, subprocess, re, time + +VERBOSE = False + +############################################################################### +def parse_command_line(args, description): +############################################################################### + parser = argparse.ArgumentParser( + usage="""\n%s <cppfile> <build-command> <run-command> [--verbose] +OR +%s --help +OR +%s --test + +\033[1mEXAMPLES:\033[0m + > %s foo.cpp 'make -j4' foo +""" % ((os.path.basename(args[0]), ) * 4), + +description=description, + +formatter_class=argparse.ArgumentDefaultsHelpFormatter +) + + parser.add_argument("cppfile", help="Name of file to modify.") + + parser.add_argument("buildcmd", help="Build command") + + parser.add_argument("execmd", help="Run command") + + parser.add_argument("-v", "--verbose", action="store_true", + help="Print extra information") + + parser.add_argument("-s", "--start", type=int, default=1, + help="Starting number of dupes") + + parser.add_argument("-e", "--end", type=int, default=1000, + help="Ending number of dupes") + + parser.add_argument("-n", "--repeat", type=int, default=10, + help="Number of times to repeat an individial execution. Best value will be taken.") + + parser.add_argument("-t", "--template", action="store_true", + help="Use templating instead of source copying to increase object size") + + parser.add_argument("-c", "--csv", action="store_true", + help="Print results as CSV") + + args = parser.parse_args(args[1:]) + + if (args.verbose): + global VERBOSE + VERBOSE = True + + return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv + +############################################################################### +def verbose_print(msg, override=None): +############################################################################### + if ( (VERBOSE and not override is False) or override): + print msg + +############################################################################### +def error_print(msg): +############################################################################### + print >> sys.stderr, msg + +############################################################################### +def expect(condition, error_msg): +############################################################################### + """ + Similar to assert except doesn't generate an ugly stacktrace. Useful for + checking user error, not programming error. + """ + if (not condition): + raise SystemExit("FAIL: %s" % error_msg) + +############################################################################### +def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None, + arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE): +############################################################################### + verbose_print("RUN: %s" % cmd, verbose) + + if (input_str is not None): + stdin = subprocess.PIPE + else: + stdin = None + + proc = subprocess.Popen(cmd, + shell=True, + stdout=arg_stdout, + stderr=arg_stderr, + stdin=stdin, + cwd=from_dir) + output, errput = proc.communicate(input_str) + output = output.strip() if output is not None else output + stat = proc.wait() + + if (ok_to_fail): + return stat, output, errput + else: + if (arg_stderr is not None): + errput = errput if errput is not None else open(arg_stderr.name, "r").read() + expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput)) + else: + expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd) + return output + +############################################################################### +def build_and_run(source, cppfile, buildcmd, execmd, repeat): +############################################################################### + open(cppfile, 'w').writelines(source) + + run_cmd(buildcmd) + + best = None + for i in xrange(repeat): + wait_for_quiet_machine() + output = run_cmd(execmd) + + current = None + fom_regex = re.compile(r'^FOM: ([0-9.]+)$') + for line in output.splitlines(): + m = fom_regex.match(line) + if (m is not None): + current = float(m.groups()[0]) + break + + expect(current is not None, "No lines in output matched FOM regex") + + if (best is None or best < current): + best = current + + return best + +############################################################################### +def wait_for_quiet_machine(): +############################################################################### + while(True): + time.sleep(2) + + # The first iteration of top gives garbage results + idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'") + + idle_pct_re = re.compile(r'^([0-9.]+)%id$') + m = idle_pct_re.match(idle_pct_raw) + + expect(m is not None, "top not returning output in expected form") + + idle_pct = float(m.groups()[0]) + if (idle_pct < 95): + error_print("Machine is too busy, waiting for it to become free") + else: + break + +############################################################################### +def add_n_dupes(curr_lines, num_dupes, template): +############################################################################### + function_name = None + function_invocation = None + function_lines = [] + + function_re = re.compile(r'^.* (\w+) *[(]') + function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$') + + # Get function lines + record = False + definition_insertion_point = None + invocation_insertion_point = None + for idx, line in enumerate(curr_lines): + if ("JGF_DUPLICATE_BEGIN" in line): + record = True + m = function_re.match(curr_lines[idx+1]) + expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1]) + function_name = m.groups()[0] + + elif ("JGF_DUPLICATE_END" in line): + record = False + definition_insertion_point = idx + 1 + + elif (record): + function_lines.append(line) + + elif ("JGF_DUPE" in line): + m = function_inv_re.match(line) + expect(m is not None, "Could not find function invocation example in line '%s'" % line) + function_invocation = m.groups()[0] + invocation_insertion_point = idx + 1 + + expect(function_name is not None, "Could not find name of dupe function") + expect(function_invocation is not None, "Could not find function invocation point") + + expect(definition_insertion_point < invocation_insertion_point, "fix me") + + dupe_func_defs = [] + dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"] + + for i in xrange(num_dupes): + if (not template): + dupe_func = list(function_lines) + dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i)) + dupe_func_defs.extend(dupe_func) + + dupe_invocations.append("else if (jgf_rand == %d) " % i) + if (template): + dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n" + else: + dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n" + dupe_invocations.append(dupe_call) + + curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations + curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs + +############################################################################### +def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False): +############################################################################### + fom_change = (curr_fom - orig_fom) / orig_fom + + if (csv): + if (is_first_report): + print "num_dupes, obj_byte_size, loc, fom, pct_diff" + + print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100) + else: + print "========================================================" + print "For number of dupes:", num_dupes + print "Object file size (bytes):", os.path.getsize(object_file) + print "Lines of code:", len(curr_lines) + print "Field of merit:", curr_fom + print "Change pct:", fom_change*100 + +############################################################################### +def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False): +############################################################################### + orig_source_lines = open(cppfile, 'r').readlines() + + backup_file = "%s.orig" % cppfile + object_file = "%s.o" % os.path.splitext(cppfile)[0] + os.rename(cppfile, backup_file) + + orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat) + report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True) + + i = start + while (i < end): + curr_lines = list(orig_source_lines) + add_n_dupes(curr_lines, i, template) + + curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat) + + report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv) + + i *= 2 # make growth function configurable? + + os.remove(cppfile) + os.rename(backup_file, cppfile) + +############################################################################### +def _main_func(description): +############################################################################### + if ("--test" in sys.argv): + test_results = doctest.testmod(verbose=True) + sys.exit(1 if test_results.failed > 0 else 0) + + cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description) + + obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv) + +############################################################################### +if (__name__ == "__main__"): + _main_func(__doc__) diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..894935fa0110efc132b9a9680bb54c2cf9b11415 --- /dev/null +++ b/lib/kokkos/containers/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Containers) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1d71d8af341181f689a6a8bf63036b67584cb138 --- /dev/null +++ b/lib/kokkos/containers/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/containers/cmake/KokkosContainers_config.h.in b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in new file mode 100644 index 0000000000000000000000000000000000000000..d91fdda1e353eddb2088ff86327e142676c9a6c9 --- /dev/null +++ b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_CONTAINERS_CONFIG_H +#define KOKKOS_CONTAINERS_CONFIG_H + +#endif diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..726d403452bab92dfaab0a3275d9be42af6afa4f --- /dev/null +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -0,0 +1,37 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + TestMain.cpp + TestCuda.cpp + ) + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES TestThreads.cpp) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES TestOpenMP.cpp) +ENDIF() + +# Per #374, we always want to build this test, but we only want to run +# it as a PERFORMANCE test. That's why we separate building the test +# from running the test. + +TRIBITS_ADD_EXECUTABLE( + PerfTestExec + SOURCES ${SOURCES} + COMM serial mpi + TESTONLYLIBS kokkos_gtest + ) + +TRIBITS_ADD_TEST( + PerformanceTest + NAME PerfTestExec + COMM serial mpi + NUM_MPI_PROCS 1 + CATEGORIES PERFORMANCE + FAIL_REGULAR_EXPRESSION " FAILED " + ) diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e7abaf44ce07fb725bb1947d86b573ac6a15dae4 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -0,0 +1,81 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_OpenMP + TEST_TARGETS += test-openmp +endif + +KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda + +KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads + +KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP + +test-cuda: KokkosContainers_PerformanceTest_Cuda + ./KokkosContainers_PerformanceTest_Cuda + +test-threads: KokkosContainers_PerformanceTest_Threads + ./KokkosContainers_PerformanceTest_Threads + +test-openmp: KokkosContainers_PerformanceTest_OpenMP + ./KokkosContainers_PerformanceTest_OpenMP + + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8183adaa60b8226fdd5979253cc619ff90e701ba --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdint.h> +#include <string> +#include <iostream> +#include <iomanip> +#include <sstream> +#include <fstream> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) + +#include <TestDynRankView.hpp> + +#include <Kokkos_UnorderedMap.hpp> + +#include <TestGlobal2LocalIds.hpp> + +#include <TestUnorderedMapPerformance.hpp> + +namespace Performance { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( cuda, dynrankview_perf ) +{ + std::cout << "Cuda" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf<Kokkos::Cuda>( 4096 ); +} + +TEST_F( cuda, global_2_local) +{ + std::cout << "Cuda" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids<Kokkos::Cuda>(i); +} + +TEST_F( cuda, unordered_map_performance_near) +{ + Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near"); +} + +TEST_F( cuda, unordered_map_performance_far) +{ + Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far"); +} + +} + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aab6e6988fc847360f02474daab52110a18ef8ef --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -0,0 +1,265 @@ + +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DYNRANKVIEW_HPP +#define KOKKOS_TEST_DYNRANKVIEW_HPP + +#include <Kokkos_Core.hpp> +#include <Kokkos_DynRankView.hpp> +#include <vector> + +#include <impl/Kokkos_Timer.hpp> + +// Compare performance of DynRankView to View, specific focus on the parenthesis operators + +namespace Performance { + +//View functor +template <typename DeviceType> +struct InitViewFunctor { + typedef Kokkos::View<double***, DeviceType> inviewtype; + inviewtype _inview; + + InitViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::View<double***, DeviceType> inviewtype; + inviewtype _inview; + + typedef Kokkos::View<double*, DeviceType> outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + +template <typename DeviceType> +struct InitStrideViewFunctor { + typedef Kokkos::View<double***, Kokkos::LayoutStride, DeviceType> inviewtype; + inviewtype _inview; + + InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + +}; + +template <typename DeviceType> +struct InitViewRank7Functor { + typedef Kokkos::View<double*******, DeviceType> inviewtype; + inviewtype _inview; + + InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; + } + } + } + +}; + +//DynRankView functor +template <typename DeviceType> +struct InitDynRankViewFunctor { + typedef Kokkos::DynRankView<double, DeviceType> inviewtype; + inviewtype _inview; + + InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::DynRankView<double, DeviceType> inviewtype; + inviewtype _inview; + + typedef Kokkos::DynRankView<double, DeviceType> outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + + +template <typename DeviceType> +void test_dynrankview_op_perf( const int par_size ) +{ + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + const size_type dim2 = 900; + const size_type dim3 = 300; + + double elapsed_time_view = 0; + double elapsed_time_compview = 0; + double elapsed_time_strideview = 0; + double elapsed_time_view_rank7 = 0; + double elapsed_time_drview = 0; + double elapsed_time_compdrview = 0; + Kokkos::Timer timer; + { + Kokkos::View<double***,DeviceType> testview("testview",par_size,dim2,dim3); + typedef InitViewFunctor<DeviceType> FunctorType; + + timer.reset(); + Kokkos::RangePolicy<DeviceType> policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view = timer.seconds(); + std::cout << " View time (init only): " << elapsed_time_view << std::endl; + + + timer.reset(); + Kokkos::View<double*,DeviceType> sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); + DeviceType::fence(); + elapsed_time_compview = timer.seconds(); + std::cout << " View sum computation time: " << elapsed_time_view << std::endl; + + + Kokkos::View<double***,Kokkos::LayoutStride, DeviceType> teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL); + typedef InitStrideViewFunctor<DeviceType> FunctorStrideType; + + timer.reset(); + Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); + DeviceType::fence(); + elapsed_time_strideview = timer.seconds(); + std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; + } + { + Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim2,dim3,1,1,1,1); + typedef InitViewRank7Functor<DeviceType> FunctorType; + + timer.reset(); + Kokkos::RangePolicy<DeviceType> policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view_rank7 = timer.seconds(); + std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; + } + { + Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim2,dim3); + typedef InitDynRankViewFunctor<DeviceType> FunctorType; + + timer.reset(); + Kokkos::RangePolicy<DeviceType> policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testdrview) ); + DeviceType::fence(); + elapsed_time_drview = timer.seconds(); + std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; + + timer.reset(); + Kokkos::DynRankView<double,DeviceType> sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); + DeviceType::fence(); + elapsed_time_compdrview = timer.seconds(); + std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; + + } + + std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1 + std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1 + std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ? + + timer.reset(); + +} //end test_dynrankview + + +} //end Performance +#endif diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66f1fbf092dd4231e359602f4d6850fe757d7333 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -0,0 +1,231 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP +#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + +#include <Kokkos_Core.hpp> +#include <Kokkos_UnorderedMap.hpp> +#include <vector> +#include <algorithm> + +#include <impl/Kokkos_Timer.hpp> + +// This test will simulate global ids + +namespace Performance { + +static const unsigned begin_id_size = 256u; +static const unsigned end_id_size = 1u << 22; +static const unsigned id_step = 2u; + +union helper +{ + uint32_t word; + uint8_t byte[4]; +}; + + +template <typename Device> +struct generate_ids +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<uint32_t*,execution_space> local_id_view; + + local_id_view local_2_global; + + generate_ids( local_id_view & ids) + : local_2_global(ids) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + + helper x = {static_cast<uint32_t>(i)}; + + // shuffle the bytes of i to create a unique, semi-random global_id + x.word = ~x.word; + + uint8_t tmp = x.byte[3]; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; + + tmp = x.byte[2]; + x.byte[2] = x.byte[0]; + x.byte[0] = tmp; + + local_2_global[i] = x.word; + } + +}; + +template <typename Device> +struct fill_map +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view; + typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + fill_map( global_id_view gIds, local_id_view lIds) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + global_2_local.insert( local_2_global[i], i); + } + +}; + +template <typename Device> +struct find_test +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view; + typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + typedef size_t value_type; + + find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type & v) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type & dst, volatile value_type const & src) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i, value_type & num_errors) const + { + uint32_t index = global_2_local.find( local_2_global[i] ); + + if ( global_2_local.value_at(index) != i) ++num_errors; + } + +}; + +template <typename Device> +void test_global_to_local_ids(unsigned num_ids) +{ + + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + + typedef Kokkos::View<uint32_t*,execution_space> local_id_view; + typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view; + + //size + std::cout << num_ids << ", "; + + double elasped_time = 0; + Kokkos::Timer timer; + + local_id_view local_2_global("local_ids", num_ids); + global_id_view global_2_local((3u*num_ids)/2u); + + //create + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + // generate unique ids + { + generate_ids<Device> gen(local_2_global); + } + Device::fence(); + // generate + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + { + fill_map<Device> fill(global_2_local, local_2_global); + } + Device::fence(); + + // fill + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + + size_t num_errors = 0; + for (int i=0; i<100; ++i) + { + find_test<Device> find(global_2_local, local_2_global,num_errors); + } + Device::fence(); + + // find + elasped_time = timer.seconds(); + std::cout << elasped_time << std::endl; + + ASSERT_EQ( num_errors, 0u); +} + + +} // namespace Performance + + +#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f952ab3db51028aff0a0ebfe313b2639e353ab87 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da74d32ac1fad932f7354d73384ddcb9bec75354 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <Kokkos_UnorderedMap.hpp> + +#include <TestGlobal2LocalIds.hpp> +#include <TestUnorderedMapPerformance.hpp> + +#include <TestDynRankView.hpp> + +#include <iomanip> +#include <sstream> +#include <string> +#include <fstream> + + +namespace Performance { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "OpenMP: " << num_threads << std::endl; + + Kokkos::OpenMP::initialize( num_threads ); + + std::cout << "available threads: " << omp_get_max_threads() << std::endl; + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + + omp_set_num_threads(1); + + ASSERT_EQ( 1 , omp_get_max_threads() ); + } +}; + +TEST_F( openmp, dynrankview_perf ) +{ + std::cout << "OpenMP" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf<Kokkos::OpenMP>( 8192 ); +} + +TEST_F( openmp, global_2_local) +{ + std::cout << "OpenMP" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids<Kokkos::OpenMP>(i); +} + +TEST_F( openmp, unordered_map_performance_near) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-near"; + Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str()); +} + +TEST_F( openmp, unordered_map_performance_far) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-far"; + Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str()); +} + +} // namespace test + diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4179b7de4c79cc095d83ef4fcdd179593a575f08 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <Kokkos_UnorderedMap.hpp> + +#include <iomanip> + +#include <TestGlobal2LocalIds.hpp> +#include <TestUnorderedMapPerformance.hpp> + +#include <TestDynRankView.hpp> + +#include <iomanip> +#include <sstream> +#include <string> +#include <fstream> + +namespace Performance { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +TEST_F( threads, dynrankview_perf ) +{ + std::cout << "Threads" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf<Kokkos::Threads>( 8192 ); +} + +TEST_F( threads, global_2_local) +{ + std::cout << "Threads" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids<Kokkos::Threads>(i); +} + +TEST_F( threads, unordered_map_performance_near) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-near"; + Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str()); +} + +TEST_F( threads, unordered_map_performance_far) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-far"; + Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str()); +} + +} // namespace Performance + + diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp new file mode 100644 index 0000000000000000000000000000000000000000..71d1182cbe0ea1ca13d92833111a612a93f72f1c --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -0,0 +1,262 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP +#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP + +#include <impl/Kokkos_Timer.hpp> + +#include <iostream> +#include <iomanip> +#include <fstream> +#include <string> +#include <sstream> + + +namespace Perf { + +template <typename Device, bool Near> +struct UnorderedMapTest +{ + typedef Device execution_space; + typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type; + typedef typename map_type::histogram_type histogram_type; + + struct value_type { + uint32_t failed_count; + uint32_t max_list; + }; + + uint32_t capacity; + uint32_t inserts; + uint32_t collisions; + double seconds; + map_type map; + histogram_type histogram; + + UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions) + : capacity(arg_capacity) + , inserts(arg_inserts) + , collisions(arg_collisions) + , seconds(0) + , map(capacity) + , histogram(map.get_histogram()) + { + Kokkos::Timer wall_clock ; + wall_clock.reset(); + + value_type v = {}; + int loop_count = 0; + do { + ++loop_count; + + v = value_type(); + Kokkos::parallel_reduce(inserts, *this, v); + + if (v.failed_count > 0u) { + const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ; + map.rehash( new_capacity ); + } + } while (v.failed_count > 0u); + + seconds = wall_clock.seconds(); + + switch (loop_count) + { + case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; + case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; + default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; + } + std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; + + histogram.calculate(); + Device::fence(); + } + + void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) + { + metrics_out << map.capacity() << " , "; + metrics_out << inserts/collisions << " , "; + metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , "; + metrics_out << inserts << " , "; + metrics_out << (map.failed_insert() ? "true" : "false") << " , "; + metrics_out << collisions << " , "; + metrics_out << 1e9*(seconds/inserts) << " , "; + metrics_out << seconds << std::endl; + + length_out << map.capacity() << " , "; + length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + length_out << collisions << " , "; + histogram.print_length(length_out); + + distance_out << map.capacity() << " , "; + distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + distance_out << collisions << " , "; + histogram.print_distance(distance_out); + + block_distance_out << map.capacity() << " , "; + block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + block_distance_out << collisions << " , "; + histogram.print_block_distance(block_distance_out); + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { + v.failed_count = 0; + v.max_list = 0; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { + dst.failed_count += src.failed_count; + dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; + } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + const uint32_t key = Near ? i/collisions : i%(inserts/collisions); + typename map_type::insert_result result = map.insert(key,i); + v.failed_count += !result.failed() ? 0 : 1; + v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position(); + } + +}; + +//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS + +template <typename Device, bool Near> +void run_performance_tests(std::string const & base_file_name) +{ +#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS) + std::string metrics_file_name = base_file_name + std::string("-metrics.csv"); + std::string length_file_name = base_file_name + std::string("-length.csv"); + std::string distance_file_name = base_file_name + std::string("-distance.csv"); + std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv"); + + std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out ); + std::ofstream length_out( length_file_name.c_str(), std::ofstream::out ); + std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out ); + std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out ); + + + /* + const double test_ratios[] = { + 0.50 + , 0.75 + , 0.80 + , 0.85 + , 0.90 + , 0.95 + , 1.00 + , 1.25 + , 2.00 + }; + */ + + const double test_ratios[] = { 1.00 }; + + const int num_ratios = sizeof(test_ratios) / sizeof(double); + + /* + const uint32_t collisions[] { + 1 + , 4 + , 16 + , 64 + }; + */ + + const uint32_t collisions[] = { 16 }; + + const int num_collisions = sizeof(collisions) / sizeof(uint32_t); + + // set up file headers + metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl; + length_out << "Capacity , Percent Full , "; + distance_out << "Capacity , Percent Full , "; + block_distance_out << "Capacity , Percent Full , "; + + for (int i=0; i<100; ++i) { + length_out << i << " , "; + distance_out << i << " , "; + block_distance_out << i << " , "; + } + + length_out << "\b\b\b " << std::endl; + distance_out << "\b\b\b " << std::endl; + block_distance_out << "\b\b\b " << std::endl; + + Kokkos::Timer wall_clock ; + for (int i=0; i < num_collisions ; ++i) { + wall_clock.reset(); + std::cout << "Collisions: " << collisions[i] << std::endl; + for (int j = 0; j < num_ratios; ++j) { + std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush; + for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) { + uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity)); + std::cout << capacity << std::flush; + UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]); + Device::fence(); + test.print(metrics_out, length_out, distance_out, block_distance_out); + } + std::cout << "\b\b " << std::endl; + + } + std::cout << " " << wall_clock.seconds() << " secs" << std::endl; + } + metrics_out.close(); + length_out.close(); + distance_out.close(); + block_distance_out.close(); +#else + (void)base_file_name; + std::cout << "skipping test" << std::endl; +#endif +} + + +} // namespace Perf + +#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..da5a791530fb887a409dbb236c9a512c3f960dd1 --- /dev/null +++ b/lib/kokkos/containers/src/CMakeLists.txt @@ -0,0 +1,31 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +SET(HEADERS "") +SET(SOURCES "") + +SET(HEADERS_IMPL "") + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB HEADERS_IMPL impl/*.hpp) +FILE(GLOB SOURCES impl/*.cpp) + +SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + +INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + +TRIBITS_ADD_LIBRARY( + kokkoscontainers + HEADERS ${HEADERS} + NOINSTALLHEADERS ${HEADERS_IMPL} + SOURCES ${SOURCES} + DEPLIBS + ) + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74da5f61b5d1e9506bf426595e0de9574384662b --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -0,0 +1,437 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BITSET_HPP +#define KOKKOS_BITSET_HPP + +#include <Kokkos_Core.hpp> +#include <Kokkos_Functional.hpp> + +#include <impl/Kokkos_Bitset_impl.hpp> + +#include <stdexcept> + +namespace Kokkos { + +template <typename Device = Kokkos::DefaultExecutionSpace > +class Bitset; + +template <typename Device = Kokkos::DefaultExecutionSpace > +class ConstBitset; + +template <typename DstDevice, typename SrcDevice> +void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src); + +template <typename DstDevice, typename SrcDevice> +void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src); + +template <typename DstDevice, typename SrcDevice> +void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src); + + +/// A thread safe view to a bitset +template <typename Device> +class Bitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + + enum { BIT_SCAN_REVERSE = 1u }; + enum { MOVE_HINT_BACKWARD = 2u }; + + enum { + BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u + , BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE + , BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD + , BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD + }; + +private: + enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size-1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + + + /// constructor + /// arg_size := number of bit in set + Bitset(unsigned arg_size = 0u) + : m_size(arg_size) + , m_last_block_mask(0u) + , m_blocks("Bitset", ((m_size + block_mask) >> block_shift) ) + { + for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) { + m_last_block_mask |= 1u << i; + } + } + + /// assignment + Bitset<Device> & operator = (Bitset<Device> const & rhs) + { + this->m_size = rhs.m_size; + this->m_last_block_mask = rhs.m_last_block_mask; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + /// copy constructor + Bitset( Bitset<Device> const & rhs) + : m_size( rhs.m_size ) + , m_last_block_mask( rhs.m_last_block_mask ) + , m_blocks( rhs.m_blocks ) + {} + + /// number of bits in the set + /// can be call from the host or the device + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { return m_size; } + + /// number of bits which are set to 1 + /// can only be called from the host + unsigned count() const + { + Impl::BitsetCount< Bitset<Device> > f(*this); + return f.apply(); + } + + /// set all bits to 1 + /// can only be called from the host + void set() + { + Kokkos::deep_copy(m_blocks, ~0u ); + + if (m_last_block_mask) { + //clear the unused bits in the last block + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned)); + } + } + + /// set all bits to 0 + /// can only be called from the host + void reset() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set all bits to 0 + /// can only be called from the host + void clear() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set i'th bit to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast<int>( i & block_mask ); + + return !( atomic_fetch_or( block_ptr, mask ) & mask ); + } + return false; + } + + /// set i'th bit to 0 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool reset( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast<int>( i & block_mask ); + + return atomic_fetch_and( block_ptr, ~mask ) & mask; + } + return false; + } + + /// return true if the i'th bit set to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]); + const unsigned mask = 1u << static_cast<int>( i & block_mask ); + return block & mask; + } + return false; + } + + /// used with find_any_set_near or find_any_unset_near functions + /// returns the max number of times those functions should be call + /// when searching for an available bit + KOKKOS_FORCEINLINE_FUNCTION + unsigned max_hint() const + { + return m_blocks.dimension_0(); + } + + /// find a bit set to 1 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + + /// find a bit set to 0 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = hint >> block_shift; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + +private: + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const + { + Kokkos::pair<bool, unsigned> result( block > 0u, 0); + + if (!result.first) { + result.second = update_hint( block_idx, offset, scan_direction ); + } + else { + result.second = scan_block( (block_idx << block_shift) + , offset + , block + , scan_direction + ); + } + return result; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const + { + offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; + block = Impl::rotate_right(block, offset); + return ((( !(scan_direction & BIT_SCAN_REVERSE) ? + Impl::bit_scan_forward(block) : + Impl::bit_scan_reverse(block) + ) + offset + ) & block_mask + ) + block_start; + } + + KOKKOS_FORCEINLINE_FUNCTION + unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const + { + block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; + block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1; + block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0; + + return static_cast<unsigned>(block_idx)*block_size + offset; + } + +private: + + unsigned m_size; + unsigned m_last_block_mask; + View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks; + +private: + template <typename DDevice> + friend class Bitset; + + template <typename DDevice> + friend class ConstBitset; + + template <typename Bitset> + friend struct Impl::BitsetCount; + + template <typename DstDevice, typename SrcDevice> + friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src); + + template <typename DstDevice, typename SrcDevice> + friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src); +}; + +/// a thread-safe view to a const bitset +/// i.e. can only test bits +template <typename Device> +class ConstBitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + +private: + enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size -1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + ConstBitset() + : m_size (0) + {} + + ConstBitset(Bitset<Device> const& rhs) + : m_size(rhs.m_size) + , m_blocks(rhs.m_blocks) + {} + + ConstBitset(ConstBitset<Device> const& rhs) + : m_size( rhs.m_size ) + , m_blocks( rhs.m_blocks ) + {} + + ConstBitset<Device> & operator = (Bitset<Device> const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { + return m_size; + } + + unsigned count() const + { + Impl::BitsetCount< ConstBitset<Device> > f(*this); + return f.apply(); + } + + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast<int>( i & block_mask ); + return block & mask; + } + return false; + } + +private: + + unsigned m_size; + View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks; + +private: + template <typename DDevice> + friend class ConstBitset; + + template <typename Bitset> + friend struct Impl::BitsetCount; + + template <typename DstDevice, typename SrcDevice> + friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src); + + template <typename DstDevice, typename SrcDevice> + friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src); +}; + + +template <typename DstDevice, typename SrcDevice> +void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template <typename DstDevice, typename SrcDevice> +void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template <typename DstDevice, typename SrcDevice> +void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +} // namespace Kokkos + +#endif //KOKKOS_BITSET_HPP diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1230df4d97741123f2be0a011fd8fd7a40fbd35f --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -0,0 +1,982 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DualView.hpp +/// \brief Declaration and definition of Kokkos::DualView. +/// +/// This header file declares and defines Kokkos::DualView and its +/// related nonmember functions. + +#ifndef KOKKOS_DUALVIEW_HPP +#define KOKKOS_DUALVIEW_HPP + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> + +namespace Kokkos { + +/* \class DualView + * \brief Container to manage mirroring a Kokkos::View that lives + * in device memory with a Kokkos::View that lives in host memory. + * + * This class provides capabilities to manage data which exists in two + * memory spaces at the same time. It keeps views of the same layout + * on two memory spaces as well as modified flags for both + * allocations. Users are responsible for setting the modified flags + * manually if they change the data in either memory space, by calling + * the sync() method templated on the device where they modified the + * data. Users may synchronize data by calling the modify() function, + * templated on the device towards which they want to synchronize + * (i.e., the target of the one-way copy operation). + * + * The DualView class also provides convenience methods such as + * realloc, resize and capacity which call the appropriate methods of + * the underlying Kokkos::View objects. + * + * The four template arguments are the same as those of Kokkos::View. + * (Please refer to that class' documentation for a detailed + * description.) + * + * \tparam DataType The type of the entries stored in the container. + * + * \tparam Layout The array's layout in memory. + * + * \tparam Device The Kokkos Device type. If its memory space is + * not the same as the host's memory space, then DualView will + * contain two separate Views: one in device memory, and one in + * host memory. Otherwise, DualView will only store one View. + * + * \tparam MemoryTraits (optional) The user's intended memory access + * behavior. Please see the documentation of Kokkos::View for + * examples. The default suffices for most users. + */ +template< class DataType , + class Arg1Type = void , + class Arg2Type = void , + class Arg3Type = void> +class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > +{ +public: + //! \name Typedefs for device types and various Kokkos::View specializations. + //@{ + typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + + //! The Kokkos Host Device type; + typedef typename traits::host_mirror_space host_mirror_space ; + + //! The type of a Kokkos::View on the device. + typedef View< typename traits::data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev ; + + /// \typedef t_host + /// \brief The type of a Kokkos::View host mirror of \c t_dev. + typedef typename t_dev::HostMirror t_host ; + + //! The type of a const View on the device. + //! The type of a Kokkos::View on the device. + typedef View< typename traits::const_data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev_const ; + + /// \typedef t_host_const + /// \brief The type of a const View host mirror of \c t_dev_const. + typedef typename t_dev_const::HostMirror t_host_const; + + //! The type of a const, random-access View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_dev_const_randomread ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; + + //! The type of an unmanaged View on the device. + typedef View< typename traits::data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_um; + + //! The type of an unmanaged View host mirror of \c t_dev_um. + typedef View< typename t_host::data_type , + typename t_host::array_layout , + typename t_host::device_type , + MemoryUnmanaged> t_host_um; + + //! The type of a const unmanaged View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_const_um; + + //! The type of a const unmanaged View host mirror of \c t_dev_const_um. + typedef View<typename t_host::const_data_type, + typename t_host::array_layout, + typename t_host::device_type, + MemoryUnmanaged> t_host_const_um; + + //! The type of a const, random-access View on the device. + typedef View< typename t_host::const_data_type , + typename t_host::array_layout , + typename t_host::device_type , + Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > t_dev_const_randomread_um ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; + + //@} + //! \name The two View instances. + //@{ + + t_dev d_view; + t_host h_view; + + //@} + //! \name Counters to keep track of changes ("modified" flags) + //@{ + + View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device; + View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host; + + //@} + //! \name Constructors + //@{ + + /// \brief Empty constructor. + /// + /// Both device and host View objects are constructed using their + /// default constructors. The "modified" flags are both initialized + /// to "unmodified." + DualView () : + modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")), + modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) + {} + + /// \brief Constructor that allocates View objects on both host and device. + /// + /// This constructor works like the analogous constructor of View. + /// The first argument is a string label, which is entirely for your + /// benefit. (Different DualView objects may have the same label if + /// you like.) The arguments that follow are the dimensions of the + /// View objects. For example, if the View has three dimensions, + /// the first three integer arguments will be nonzero, and you may + /// omit the integer arguments that follow. + DualView (const std::string& label, + const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0, + const size_t n7 = 0) + : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) + , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors + , modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")) + , modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) + {} + + //! Copy constructor (shallow copy) + template<class SS, class LS, class DS, class MS> + DualView (const DualView<SS,LS,DS,MS>& src) : + d_view (src.d_view), + h_view (src.h_view), + modified_device (src.modified_device), + modified_host (src.modified_host) + {} + + //! Subview constructor + template< class SD, class S1 , class S2 , class S3 + , class Arg0 , class ... Args > + DualView( const DualView<SD,S1,S2,S3> & src + , const Arg0 & arg0 + , Args ... args + ) + : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) + , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) + , modified_device (src.modified_device) + , modified_host (src.modified_host) + {} + + /// \brief Create DualView from existing device and host View objects. + /// + /// This constructor assumes that the device and host View objects + /// are synchronized. You, the caller, are responsible for making + /// sure this is the case before calling this constructor. After + /// this constructor returns, you may use DualView's sync() and + /// modify() methods to ensure synchronization of the View objects. + /// + /// \param d_view_ Device View + /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror) + DualView (const t_dev& d_view_, const t_host& h_view_) : + d_view (d_view_), + h_view (h_view_), + modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")), + modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) + { +#if ! KOKKOS_USING_EXP_VIEW + Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ()); +#else + if ( int(d_view.rank) != int(h_view.rank) || + d_view.dimension_0() != h_view.dimension_0() || + d_view.dimension_1() != h_view.dimension_1() || + d_view.dimension_2() != h_view.dimension_2() || + d_view.dimension_3() != h_view.dimension_3() || + d_view.dimension_4() != h_view.dimension_4() || + d_view.dimension_5() != h_view.dimension_5() || + d_view.dimension_6() != h_view.dimension_6() || + d_view.dimension_7() != h_view.dimension_7() || + d_view.stride_0() != h_view.stride_0() || + d_view.stride_1() != h_view.stride_1() || + d_view.stride_2() != h_view.stride_2() || + d_view.stride_3() != h_view.stride_3() || + d_view.stride_4() != h_view.stride_4() || + d_view.stride_5() != h_view.stride_5() || + d_view.stride_6() != h_view.stride_6() || + d_view.stride_7() != h_view.stride_7() || + d_view.span() != h_view.span() ) { + Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); + } +#endif + } + + //@} + //! \name Methods for synchronizing, marking as modified, and getting Views. + //@{ + + /// \brief Return a View on a specific device \c Device. + /// + /// Please don't be afraid of the if_c expression in the return + /// value's type. That just tells the method what the return type + /// should be: t_dev if the \c Device template parameter matches + /// this DualView's device type, else t_host. + /// + /// For example, suppose you create a DualView on Cuda, like this: + /// \code + /// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type; + /// dual_view_type DV ("my dual view", 100); + /// \endcode + /// If you want to get the CUDA device View, do this: + /// \code + /// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> (); + /// \endcode + /// and if you want to get the host mirror of that View, do this: + /// \code + /// typedef typename Kokkos::HostSpace::execution_space host_device_type; + /// typename dual_view_type::t_host hostView = DV.view<host_device_type> (); + /// \endcode + template< class Device > + KOKKOS_INLINE_FUNCTION + const typename Impl::if_c< + Impl::is_same<typename t_dev::memory_space, + typename Device::memory_space>::value, + t_dev, + t_host>::type& view () const + { + return Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + t_dev, + t_host >::select (d_view , h_view); + } + + /// \brief Update data on device or host only if data in the other + /// space has been marked as modified. + /// + /// If \c Device is the same as this DualView's device type, then + /// copy data from host to device. Otherwise, copy data from device + /// to host. In either case, only copy if the source of the copy + /// has been modified. + /// + /// This is a one-way synchronization only. If the target of the + /// copy has been modified, this operation will discard those + /// modifications. It will also reset both device and host modified + /// flags. + /// + /// \note This method doesn't know on its own whether you modified + /// the data in either View. You must manually mark modified data + /// as modified, by calling the modify() method with the + /// appropriate template parameter. + template<class Device> + void sync( const typename Impl::enable_if< + ( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + deep_copy (d_view, h_view); + modified_host() = modified_device() = 0; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + deep_copy (h_view, d_view); + modified_host() = modified_device() = 0; + } + } + if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) { + t_dev::execution_space::fence(); + t_host::execution_space::fence(); + } + } + + template<class Device> + void sync ( const typename Impl::enable_if< + ( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0 ) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } + } + + template<class Device> + bool need_sync() const + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + return true; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + return true; + } + } + return false; + } + /// \brief Mark data as modified on the given device \c Device. + /// + /// If \c Device is the same as this DualView's device type, then + /// mark the device's data as modified. Otherwise, mark the host's + /// data as modified. + template<class Device> + void modify () { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + // Increment the device's modified count. + modified_device () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } else { // hopefully Device is the same as DualView's host type + // Increment the host's modified count. + modified_host () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } + } + + //@} + //! \name Methods for reallocating or resizing the View objects. + //@{ + + /// \brief Reallocate both View objects. + /// + /// This discards any existing contents of the objects, and resets + /// their modified flags. It does <i>not</i> copy the old contents + /// of either View into the new View objects. + void realloc( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Reset dirty flags */ + modified_device() = modified_host() = 0; + } + + /// \brief Resize both views, copying old contents into new if necessary. + /// + /// This method only copies the old contents into the new View + /// objects for the device which was last marked as modified. + void resize( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + if(modified_device() >= modified_host()) { + /* Resize on Device */ + ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Mark Device copy as modified */ + modified_device() = modified_device()+1; + + } else { + /* Realloc on Device */ + + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + t_host temp_view = create_mirror_view( d_view ); + + /* Remap on Host */ + Kokkos::deep_copy( temp_view , h_view ); + + h_view = temp_view; + + /* Mark Host copy as modified */ + modified_host() = modified_host()+1; + } + } + + //@} + //! \name Methods for getting capacity, stride, or dimension(s). + //@{ + + //! The allocation size (same as Kokkos::View::capacity). + size_t capacity() const { +#if KOKKOS_USING_EXP_VIEW + return d_view.span(); +#else + return d_view.capacity(); +#endif + } + + //! Get stride(s) for each dimension. + template< typename iType> + void stride(iType* stride_) const { + d_view.stride(stride_); + } + + /* \brief return size of dimension 0 */ + size_t dimension_0() const {return d_view.dimension_0();} + /* \brief return size of dimension 1 */ + size_t dimension_1() const {return d_view.dimension_1();} + /* \brief return size of dimension 2 */ + size_t dimension_2() const {return d_view.dimension_2();} + /* \brief return size of dimension 3 */ + size_t dimension_3() const {return d_view.dimension_3();} + /* \brief return size of dimension 4 */ + size_t dimension_4() const {return d_view.dimension_4();} + /* \brief return size of dimension 5 */ + size_t dimension_5() const {return d_view.dimension_5();} + /* \brief return size of dimension 6 */ + size_t dimension_6() const {return d_view.dimension_6();} + /* \brief return size of dimension 7 */ + size_t dimension_7() const {return d_view.dimension_7();} + + //@} +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +#if KOKKOS_USING_EXP_VIEW + +namespace Kokkos { +namespace Impl { + +template< class D, class A1, class A2, class A3, class ... Args > +struct DualViewSubview { + + typedef typename Kokkos::Experimental::Impl::ViewMapping + < void + , Kokkos::ViewTraits< D, A1, A2, A3 > + , Args ... + >::traits_type dst_traits ; + + typedef Kokkos::DualView + < typename dst_traits::data_type + , typename dst_traits::array_layout + , typename dst_traits::device_type + , typename dst_traits::memory_traits + > type ; +}; + +} /* namespace Impl */ + + +template< class D , class A1 , class A2 , class A3 , class ... Args > +typename Impl::DualViewSubview<D,A1,A2,A3,Args...>::type +subview( const DualView<D,A1,A2,A3> & src , Args ... args ) +{ + return typename + Impl::DualViewSubview<D,A1,A2,A3,Args...>::type( src , args ... ); +} + +} /* namespace Kokkos */ + +#else + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +namespace Kokkos { +namespace Impl { + +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type + > +struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > +{ +private: + + typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ; + + enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; + enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; + enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; + enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; + enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; + enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; + enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; + enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; + + // The source view rank must be equal to the input argument rank + // Once a void argument is encountered all subsequent arguments must be void. + enum { InputRank = + Impl::StaticAssert<( SrcViewType::rank == + ( V0 ? 0 : ( + V1 ? 1 : ( + V2 ? 2 : ( + V3 ? 3 : ( + V4 ? 4 : ( + V5 ? 5 : ( + V6 ? 6 : ( + V7 ? 7 : 8 ))))))) )) + && + ( SrcViewType::rank == + ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) + >::value ? SrcViewType::rank : 0 }; + + enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; + enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; + enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; + enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; + enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; + enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; + enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; + enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; + + enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Reverse + enum { R0_rev = 0 == InputRank ? 0u : ( + 1 == InputRank ? unsigned(R0) : ( + 2 == InputRank ? unsigned(R1) : ( + 3 == InputRank ? unsigned(R2) : ( + 4 == InputRank ? unsigned(R3) : ( + 5 == InputRank ? unsigned(R4) : ( + 6 == InputRank ? unsigned(R5) : ( + 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; + + typedef typename SrcViewType::array_layout SrcViewLayout ; + + // Choose array layout, attempting to preserve original layout if at all possible. + typedef typename Impl::if_c< + ( // Same Layout IF + // OutputRank 0 + ( OutputRank == 0 ) + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value ) + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value ) + ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; + + // Choose data type as a purely dynamic rank array to accomodate a runtime range. + typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , + typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, + typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, + typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, + typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, + typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, + typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, + typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, + typename SrcViewType::value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; + + // Choose space. + // If the source view's template arg1 or arg2 is a space then use it, + // otherwise use the source view's execution space. + + typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , + typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space + >::type >::type OutputSpace ; + +public: + + // If keeping the layout then match non-data type arguments + // else keep execution space and memory traits. + typedef typename + Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value + , Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace + , typename SrcViewType::memory_traits > + >::type type ; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +namespace Kokkos { + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , void , void , void + , void , void , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , void , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0); + sub_view.h_view = subview(src.h_view,arg0); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1); + sub_view.h_view = subview(src.h_view,arg0,arg1); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void ,void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 > +typename Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type +subview( const DualView<D,A1,A2,A3> & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 , + const ArgType7 & arg7 ) +{ + typedef typename + Impl::ViewSubview< DualView<D,A1,A2,A3> + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +} // namespace Kokkos + +#endif /* KOKKOS_USING_EXP_VIEW */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// +// Partial specialization of Kokkos::deep_copy() for DualView objects. +// + +template< class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference + const DualView<ST,SL,SD,SM>& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (dst.d_view, src.d_view); + dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> (); + } else { + deep_copy (dst.h_view, src.h_view); + dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> (); + } +} + +template< class ExecutionSpace , + class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (const ExecutionSpace& exec , + DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference + const DualView<ST,SL,SD,SM>& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (exec, dst.d_view, src.d_view); + dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> (); + } else { + deep_copy (exec, dst.h_view, src.h_view); + dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> (); + } +} + +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f72277700ad87cd0fe9cb1cdee4c2d34ff69ab80 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -0,0 +1,1834 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DynRankView.hpp +/// \brief Declaration and definition of Kokkos::Experimental::DynRankView. +/// +/// This header file declares and defines Kokkos::Experimental::DynRankView and its +/// related nonmember functions. +/* + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView + */ + +#ifndef KOKKOS_DYNRANKVIEW_HPP +#define KOKKOS_DYNRANKVIEW_HPP + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <type_traits> + +namespace Kokkos { +namespace Experimental { + +template< typename DataType , class ... Properties > +class DynRankView; //forward declare + +namespace Impl { + +template <typename Specialize> +struct DynRankDimTraits { + + enum : size_t{unspecified = ~size_t(0)}; + + // Compute the rank of the view from the nonzero dimension arguments. + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return + ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified) ? 1 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified) ? 2 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified) ? 3 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified) ? 4 + : ( (N6 == unspecified && N5 == unspecified) ? 5 + : ( (N6 == unspecified) ? 6 + : 7 ) ) ) ) ) ) ); + } + + // Compute the rank of the view from the nonzero layout arguments. + template <typename Layout> + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const Layout& layout ) + { + return computeRank( layout.dimension[0] + , layout.dimension[1] + , layout.dimension[2] + , layout.dimension[3] + , layout.dimension[4] + , layout.dimension[5] + , layout.dimension[6] + , layout.dimension[7] ); + } + + // Create the layout for the rank-7 view. + // Non-strided Layout + template <typename Layout> + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutRight>::value || std::is_same<Layout , Kokkos::LayoutLeft>::value) , Layout >::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 + ); + } + + // LayoutStride + template <typename Layout> + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutStride>::value) , Layout>::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 + , layout.stride[0] + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 + , layout.stride[1] + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 + , layout.stride[2] + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 + , layout.stride[3] + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 + , layout.stride[4] + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 + , layout.stride[5] + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 + , layout.stride[6] + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 + , layout.stride[7] + ); + } + + // Create a view from the given dimension arguments. + // This is only necessary because the shmem constructor doesn't take a layout. + template <typename ViewType, typename ViewArg> + static ViewType createView( const ViewArg& arg + , const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return ViewType( arg + , N0 != unspecified ? N0 : 1 + , N1 != unspecified ? N1 : 1 + , N2 != unspecified ? N2 : 1 + , N3 != unspecified ? N3 : 1 + , N4 != unspecified ? N4 : 1 + , N5 != unspecified ? N5 : 1 + , N6 != unspecified ? N6 : 1 + , N7 != unspecified ? N7 : 1 ); + } +}; + + // Non-strided Layout + template <typename Layout , typename iType> + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutRight>::value || std::is_same<Layout , Kokkos::LayoutLeft>::value) && std::is_integral<iType>::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + ); + } + + // LayoutStride + template <typename Layout , typename iType> + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutStride>::value) && std::is_integral<iType>::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 0 ? layout.stride[0] : (0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 1 ? layout.stride[1] : (0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 2 ? layout.stride[2] : (0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 3 ? layout.stride[3] : (0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 4 ? layout.stride[4] : (0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 5 ? layout.stride[5] : (0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 6 ? layout.stride[6] : (0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + , dynrank > 7 ? layout.stride[7] : (0) + ); + } + + template < typename DynRankViewType , typename iType > + void verify_dynrankview_rank ( iType N , const DynRankViewType &drv ) + { + if ( static_cast<iType>(drv.rank()) > N ) + { + Kokkos::abort( "Need at least rank arguments to the operator()" ); + } + } + + +/** \brief Assign compatible default mappings */ +struct ViewToDynRankViewTag {}; + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + ( + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + ) + ) , ViewToDynRankViewTag >::type > +{ +private: + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_layout = + std::is_same< typename DstTraits::array_layout + , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout + , Kokkos::LayoutStride >::value + }; + +public: + + enum { is_assignable = is_assignable_value_type && + is_assignable_layout }; + + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + template < typename DT , typename ... DP , typename ST , typename ... SP > + KOKKOS_INLINE_FUNCTION + static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) + { + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_layout + , "View assignment must have compatible layout or have rank <= 1" ); + + // Removed dimension checks... + + typedef typename DstType::offset_type dst_offset_type ; + dst.m_map.m_offset = dst_offset_type(std::integral_constant<unsigned,0>() , src.layout() ); //Check this for integer input1 for padding, etc + dst.m_map.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track ); + dst.m_track.assign( src.m_track , DstTraits::is_managed ); + dst.m_rank = src.Rank ; + } +}; + +} //end Impl + +/* \class DynRankView + * \brief Container that creates a Kokkos view with rank determined at runtime. + * Essentially this is a rank 7 view that wraps the access operators + * to yield the functionality of a view + * + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView + * + */ + +template< class > struct is_dyn_rank_view : public std::false_type {}; + +template< class D, class ... P > +struct is_dyn_rank_view< Kokkos::Experimental::DynRankView<D,P...> > : public std::true_type {}; + + +template< typename DataType , class ... Properties > +class DynRankView : public ViewTraits< DataType , Properties ... > +{ + static_assert( !std::is_array<DataType>::value && !std::is_pointer<DataType>::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); + +private: + template < class , class ... > friend class DynRankView ; +// template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now... + template < class , class ... > friend class Impl::ViewMapping ; + +public: + typedef ViewTraits< DataType , Properties ... > drvtraits ; + + typedef View< DataType******* , Properties...> view_type ; + + typedef ViewTraits< DataType******* , Properties ... > traits ; + + +private: + typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + map_type m_map ; + unsigned m_rank; + +public: + KOKKOS_INLINE_FUNCTION + view_type & DownCast() const { return ( view_type & ) (*this); } + KOKKOS_INLINE_FUNCTION + const view_type & ConstDownCast() const { return (const view_type & ) (*this); } + + //Types below - at least the HostMirror requires the value_type, NOT the rank 7 data_type of the traits + + /** \brief Compatible view of array of scalar types */ + typedef DynRankView< typename drvtraits::scalar_array_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + array_type ; + + /** \brief Compatible view of const data type */ + typedef DynRankView< typename drvtraits::const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + non_const_type ; + + /** \brief Compatible HostMirror view */ + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::host_mirror_space > + HostMirror ; + + + //---------------------------------------- + // Domain rank and extents + +// enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the enum? + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + extent( const iType & r ) const + { return m_map.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , int >::type + extent_int( const iType & r ) const + { return static_cast<int>(m_map.extent(r)); } + + KOKKOS_INLINE_FUNCTION constexpr + typename traits::array_layout layout() const + { return m_map.layout(); } + + //---------------------------------------- + /* Deprecate all 'dimension' functions in favor of + * ISO/C++ vocabulary 'extent'. + */ + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + dimension( const iType & r ) const { return extent( r ); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() * + m_map.dimension_1() * + m_map.dimension_2() * + m_map.dimension_3() * + m_map.dimension_4() * + m_map.dimension_5() * + m_map.dimension_6() * + m_map.dimension_7(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + + //---------------------------------------- + // Range span is the span which contains all members. + + typedef typename map_type::reference_type reference_type ; + typedef typename map_type::pointer_type pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } + // Deprecated, use 'span()' instead + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + + // Deprecated, use 'span_is_contigous()' instead + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + // Deprecated, use 'data()' instead + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + + //---------------------------------------- + // Allow specializations to query their specialized map + + KOKKOS_INLINE_FUNCTION + const Kokkos::Experimental::Impl::ViewMapping< traits , void > & + implementation_map() const { return m_map ; } + + //---------------------------------------- + +private: + + enum { + is_layout_left = std::is_same< typename traits::array_layout + , Kokkos::LayoutLeft >::value , + + is_layout_right = std::is_same< typename traits::array_layout + , Kokkos::LayoutRight >::value , + + is_layout_stride = std::is_same< typename traits::array_layout + , Kokkos::LayoutStride >::value , + + is_default_map = + std::is_same< typename traits::specialize , void >::value && + ( is_layout_left || is_layout_right || is_layout_stride ) + }; + +// Bounds checking macros +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ + Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \ + Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; + +#else + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); + +#endif + +public: + + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank() const { return m_rank; } + + + //operators () + // Rank 0 + KOKKOS_INLINE_FUNCTION + reference_type operator()() const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) ) + return implementation_map().reference(); + //return m_map.reference(0,0,0,0,0,0,0); + } + + // Rank 1 + // This assumes a contiguous underlying memory (i.e. no padding, no striding...) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, reference_type>::type + operator[](const iType & i0) const + { + return data()[i0]; + } + + // This assumes a contiguous underlying memory (i.e. no padding, no striding... + // AND a Trilinos/Sacado scalar type ) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, reference_type>::type + operator[](const iType & i0) const + { +// auto map = implementation_map(); + const size_t dim_scalar = m_map.dimension_scalar(); + const size_t bytes = this->span() / dim_scalar; + + typedef Kokkos::View<DataType*, typename traits::array_layout, typename traits::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged | traits::memory_traits::RandomAccess | traits::memory_traits::Atomic> > tmp_view_type; + tmp_view_type rankone_view(this->data(), bytes, dim_scalar); + return rankone_view(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type + operator()(const iType & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) ) + return m_map.reference(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type + operator()(const iType & i0 ) const + { + return m_map.reference(i0,0,0,0,0,0,0); + } + + // Rank 2 + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1); + } + + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1,0,0,0,0,0); + } + + // Rank 3 + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2); + } + + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2,0,0,0,0); + } + + // Rank 4 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); + } + + // Rank 5 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); + } + + // Rank 6 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); + } + + // Rank 7 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } + +#undef KOKKOS_VIEW_OPERATOR_VERIFY + + //---------------------------------------- + // Standard constructor, destructor, and assignment operators... + + KOKKOS_INLINE_FUNCTION + ~DynRankView() {} + + KOKKOS_INLINE_FUNCTION + DynRankView() : m_track(), m_map(), m_rank() {} //Default ctor + + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + + KOKKOS_INLINE_FUNCTION + DynRankView( DynRankView && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + + //---------------------------------------- + // Compatible view copy constructor and assignment + // may assign unmanaged from managed. + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView<RT,RP...> & rhs ) + : m_track( rhs.m_track , traits::is_managed ) + , m_map() + , m_rank(rhs.m_rank) + { + typedef typename DynRankView<RT,RP...> ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = (const DynRankView<RT,RP...> & rhs ) + { + typedef typename DynRankView<RT,RP...> ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + m_track.assign( rhs.m_track , traits::is_managed ); + m_rank = rhs.rank(); + return *this; + } + +// Experimental +// Copy/Assign View to DynRankView + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const View<RT,RP...> & rhs ) + : m_track() + , m_map() + , m_rank( rhs.Rank ) + { + typedef typename View<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( *this , rhs ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const View<RT,RP...> & rhs ) + { + typedef typename View<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); + Mapping::assign( *this , rhs ); + return *this ; + } + + //---------------------------------------- + // Allocation tracking properties + + KOKKOS_INLINE_FUNCTION + int use_count() const + { return m_track.use_count(); } + + inline + const std::string label() const + { return m_track.template get_label< typename traits::memory_space >(); } + + //---------------------------------------- + // Allocation according to allocation properties and array layout + // unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() + , m_map() + , m_rank( Impl::DynRankDimTraits<typename traits::specialize>::computeRank(arg_layout) ) + { + // Append layout and spaces if not input + typedef Impl::ViewCtorProp< P ... > alloc_prop_input ; + + // use 'std::integral_constant<unsigned,I>' for non-types + // to avoid duplicate class error. + typedef Impl::ViewCtorProp + < P ... + , typename std::conditional + < alloc_prop_input::has_label + , std::integral_constant<unsigned,0> + , typename std::string + >::type + , typename std::conditional + < alloc_prop_input::has_memory_space + , std::integral_constant<unsigned,1> + , typename traits::device_type::memory_space + >::type + , typename std::conditional + < alloc_prop_input::has_execution_space + , std::integral_constant<unsigned,2> + , typename traits::device_type::execution_space + >::type + > alloc_prop ; + + static_assert( traits::is_managed + , "View allocation constructor requires managed memory" ); + + if ( alloc_prop::initialize && + ! alloc_prop::execution_space::is_initialized() ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception("Constructing DynRankView and initializing data with uninitialized execution space"); + } + + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop( arg_prop ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + Kokkos::Experimental::Impl::SharedAllocationRecord<> * + record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized( record ); + } + + + // Wrappers + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() // No memory tracking + , m_map( arg_prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) ) + , m_rank( Impl::DynRankDimTraits<typename traits::specialize>::computeRank(arg_layout) ) + { + static_assert( + std::is_same< pointer_type + , typename Impl::ViewCtorProp< P... >::pointer_type + >::value , + "Constructing DynRankView to wrap user memory must supply matching pointer type" ); + } + + //---------------------------------------- + //Constructor(s) + + // Simple dimension-only layout + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // Allocate with label and layout + template< typename Label > + explicit inline + DynRankView( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label<Label>::value , + typename traits::array_layout >::type const & arg_layout + ) + : DynRankView( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout ) + {} + + // Allocate label and layout, must disambiguate from subview constructor + template< typename Label > + explicit inline + DynRankView( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label<Label>::value , + const size_t >::type arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( Impl::ViewCtorProp< std::string >( arg_label ) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // For backward compatibility + explicit inline + DynRankView( const ViewAllocateWithoutInitializing & arg_prop + , const typename traits::array_layout & arg_layout + ) + : DynRankView( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ) + , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) + ) + {} + + explicit inline + DynRankView( const ViewAllocateWithoutInitializing & arg_prop + , const size_t arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView(Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 ) + {} + + //---------------------------------------- + // Memory span required to wrap these dimensions. + static constexpr size_t required_allocation_size( + const size_t arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + { + return map_type::memory_span( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ); + } + + explicit KOKKOS_INLINE_FUNCTION + DynRankView( pointer_type arg_ptr + , const size_t arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 ) + {} + + explicit KOKKOS_INLINE_FUNCTION + DynRankView( pointer_type arg_ptr + , typename traits::array_layout & arg_layout + ) + : DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout ) + {} + + + //---------------------------------------- + // Shared scratch memory constructor + + static inline + size_t shmem_size( const size_t arg_N0 = ~size_t(0) , + const size_t arg_N1 = ~size_t(0) , + const size_t arg_N2 = ~size_t(0) , + const size_t arg_N3 = ~size_t(0) , + const size_t arg_N4 = ~size_t(0) , + const size_t arg_N5 = ~size_t(0) , + const size_t arg_N6 = ~size_t(0) , + const size_t arg_N7 = ~size_t(0) ) + { + const size_t num_passed_args = + ( arg_N0 != ~size_t(0) ) + ( arg_N1 != ~size_t(0) ) + ( arg_N2 != ~size_t(0) ) + + ( arg_N3 != ~size_t(0) ) + ( arg_N4 != ~size_t(0) ) + ( arg_N5 != ~size_t(0) ) + + ( arg_N6 != ~size_t(0) ) + ( arg_N7 != ~size_t(0) ); + + if ( std::is_same<typename traits::specialize , void>::value && num_passed_args != traits::rank_dynamic ) { + Kokkos::abort( "Kokkos::View::shmem_size() rank_dynamic != number of arguments.\n" ); + } + {} + + return map_type::memory_span( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ); + } + + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const typename traits::execution_space::scratch_memory_space & arg_space + , const typename traits::array_layout & arg_layout ) + : DynRankView( Impl::ViewCtorProp<pointer_type>( + reinterpret_cast<pointer_type>( + arg_space.get_shmem( map_type::memory_span( + Impl::DynRankDimTraits<typename traits::specialize>::createLayout( arg_layout ) //is this correct? + ) ) ) ) + , arg_layout ) + {} + + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const typename traits::execution_space::scratch_memory_space & arg_space + , const size_t arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) ) + + : DynRankView( Impl::ViewCtorProp<pointer_type>( + reinterpret_cast<pointer_type>( + arg_space.get_shmem( + map_type::memory_span( + Impl::DynRankDimTraits<typename traits::specialize>::createLayout( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) ) + ) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + +}; + + + template < typename D , class ... P > + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank( const DynRankView<D , P...> & DRV ) { return DRV.rank(); } //needed for transition to common constexpr method in view and dynrankview to return rank + +//---------------------------------------------------------------------------- +// Subview mapping. +// Deduce destination view type from source view traits and subview arguments + +namespace Impl { + +struct DynRankSubviewTag {}; + +template< class SrcTraits , class ... Args > +struct ViewMapping + < typename std::enable_if<( + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutStride >::value + ) + ), DynRankSubviewTag >::type + , SrcTraits + , Args ... > +{ +private: + + enum + { RZ = false + , R0 = bool(is_integral_extent<0,Args...>::value) + , R1 = bool(is_integral_extent<1,Args...>::value) + , R2 = bool(is_integral_extent<2,Args...>::value) + , R3 = bool(is_integral_extent<3,Args...>::value) + , R4 = bool(is_integral_extent<4,Args...>::value) + , R5 = bool(is_integral_extent<5,Args...>::value) + , R6 = bool(is_integral_extent<6,Args...>::value) + }; + + enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) }; + + typedef Kokkos::LayoutStride array_layout ; + + typedef typename SrcTraits::value_type value_type ; + + typedef value_type******* data_type ; + +public: + + typedef Kokkos::Experimental::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > traits_type ; + + typedef Kokkos::Experimental::View + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > type ; + + + template< class MemoryTraits > + struct apply { + + static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); + + typedef Kokkos::Experimental::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > traits_type ; + + typedef Kokkos::Experimental::View + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > type ; + }; + + + typedef typename SrcTraits::dimension dimension ; + + template < class Arg0 = int, class Arg1 = int, class Arg2 = int, class Arg3 = int, class Arg4 = int, class Arg5 = int, class Arg6 = int > + struct ExtentGenerator { + KOKKOS_INLINE_FUNCTION + static SubviewExtents< 7 , rank > generator ( const dimension & dim , Arg0 arg0 = Arg0(), Arg1 arg1 = Arg1(), Arg2 arg2 = Arg2(), Arg3 arg3 = Arg3(), Arg4 arg4 = Arg4(), Arg5 arg5 = Arg5(), Arg6 arg6 = Arg6() ) + { + return SubviewExtents< 7 , rank>( dim , arg0 , arg1 , arg2 , arg3 , arg4 , arg5 , arg6 ); + } + }; + + + typedef DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > ret_type; + + template < typename T , class ... P > + KOKKOS_INLINE_FUNCTION + static ret_type subview( const unsigned src_rank , Kokkos::Experimental::DynRankView< T , P...> const & src + , Args ... args ) + { + + typedef ViewMapping< traits_type, void > DstType ; + + typedef typename std::conditional< (rank==0) , ViewDimension<> + , typename std::conditional< (rank==1) , ViewDimension<0> + , typename std::conditional< (rank==2) , ViewDimension<0,0> + , typename std::conditional< (rank==3) , ViewDimension<0,0,0> + , typename std::conditional< (rank==4) , ViewDimension<0,0,0,0> + , typename std::conditional< (rank==5) , ViewDimension<0,0,0,0,0> + , typename std::conditional< (rank==6) , ViewDimension<0,0,0,0,0,0> + , ViewDimension<0,0,0,0,0,0,0> + >::type >::type >::type >::type >::type >::type >::type DstDimType ; + + typedef ViewOffset< DstDimType , Kokkos::LayoutStride > dst_offset_type ; + typedef typename DstType::handle_type dst_handle_type ; + + ret_type dst ; + + const SubviewExtents< 7 , rank > extents = + ExtentGenerator< Args ... >::generator( src.m_map.m_offset.m_dim , args... ) ; + + dst_offset_type tempdst( src.m_map.m_offset , extents ) ; + + dst.m_track = src.m_track ; + + dst.m_map.m_offset.m_dim.N0 = tempdst.m_dim.N0 ; + dst.m_map.m_offset.m_dim.N1 = tempdst.m_dim.N1 ; + dst.m_map.m_offset.m_dim.N2 = tempdst.m_dim.N2 ; + dst.m_map.m_offset.m_dim.N3 = tempdst.m_dim.N3 ; + dst.m_map.m_offset.m_dim.N4 = tempdst.m_dim.N4 ; + dst.m_map.m_offset.m_dim.N5 = tempdst.m_dim.N5 ; + dst.m_map.m_offset.m_dim.N6 = tempdst.m_dim.N6 ; + + dst.m_map.m_offset.m_stride.S0 = tempdst.m_stride.S0 ; + dst.m_map.m_offset.m_stride.S1 = tempdst.m_stride.S1 ; + dst.m_map.m_offset.m_stride.S2 = tempdst.m_stride.S2 ; + dst.m_map.m_offset.m_stride.S3 = tempdst.m_stride.S3 ; + dst.m_map.m_offset.m_stride.S4 = tempdst.m_stride.S4 ; + dst.m_map.m_offset.m_stride.S5 = tempdst.m_stride.S5 ; + dst.m_map.m_offset.m_stride.S6 = tempdst.m_stride.S6 ; + + dst.m_map.m_handle = dst_handle_type( src.m_map.m_handle + + src.m_map.m_offset( extents.domain_offset(0) + , extents.domain_offset(1) + , extents.domain_offset(2) + , extents.domain_offset(3) + , extents.domain_offset(4) + , extents.domain_offset(5) + , extents.domain_offset(6) + ) ); + + dst.m_rank = ( src_rank > 0 ? unsigned(R0) : 0 ) + + ( src_rank > 1 ? unsigned(R1) : 0 ) + + ( src_rank > 2 ? unsigned(R2) : 0 ) + + ( src_rank > 3 ? unsigned(R3) : 0 ) + + ( src_rank > 4 ? unsigned(R4) : 0 ) + + ( src_rank > 5 ? unsigned(R5) : 0 ) + + ( src_rank > 6 ? unsigned(R6) : 0 ) ; + + return dst ; + } +}; + +} // end Impl + + +template< class V , class ... Args > +using Subdynrankview = typename Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , V , Args... >::ret_type ; + +template< class D , class ... P , class ...Args > +KOKKOS_INLINE_FUNCTION +Subdynrankview< ViewTraits<D******* , P...> , Args... > +subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args) + { + if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args + { Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); } + + typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::Experimental::ViewTraits< D*******, P... > , Args... > metafcn ; + + return metafcn::subview( src.rank() , src , args... ); + } + +//Wrapper to allow subview function name +template< class D , class ... P , class ...Args > +KOKKOS_INLINE_FUNCTION +Subdynrankview< ViewTraits<D******* , P...> , Args... > +subview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args) + { + return subdynrankview( src , args... ); + } + +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { + +// overload == and != +template< class LT , class ... LP , class RT , class ... RP > +KOKKOS_INLINE_FUNCTION +bool operator == ( const DynRankView<LT,LP...> & lhs , + const DynRankView<RT,RP...> & rhs ) +{ + // Same data, layout, dimensions + typedef ViewTraits<LT,LP...> lhs_traits ; + typedef ViewTraits<RT,RP...> rhs_traits ; + + return + std::is_same< typename lhs_traits::const_value_type , + typename rhs_traits::const_value_type >::value && + std::is_same< typename lhs_traits::array_layout , + typename rhs_traits::array_layout >::value && + std::is_same< typename lhs_traits::memory_space , + typename rhs_traits::memory_space >::value && + lhs.rank() == rhs.rank() && + lhs.data() == rhs.data() && + lhs.span() == rhs.span() && + lhs.dimension(0) == rhs.dimension(0) && + lhs.dimension(1) == rhs.dimension(1) && + lhs.dimension(2) == rhs.dimension(2) && + lhs.dimension(3) == rhs.dimension(3) && + lhs.dimension(4) == rhs.dimension(4) && + lhs.dimension(5) == rhs.dimension(5) && + lhs.dimension(6) == rhs.dimension(6) && + lhs.dimension(7) == rhs.dimension(7); +} + +template< class LT , class ... LP , class RT , class ... RP > +KOKKOS_INLINE_FUNCTION +bool operator != ( const DynRankView<LT,LP...> & lhs , + const DynRankView<RT,RP...> & rhs ) +{ + return ! ( operator==(lhs,rhs) ); +} + +} //end Experimental +} //end Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class OutputView , typename Enable = void > +struct DynRankViewFill { + + typedef typename OutputView::traits::const_value_type const_value_type ; + + const OutputView output ; + const_value_type input ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i0 ) const + { + const size_t n1 = output.dimension_1(); + const size_t n2 = output.dimension_2(); + const size_t n3 = output.dimension_3(); + const size_t n4 = output.dimension_4(); + const size_t n5 = output.dimension_5(); + const size_t n6 = output.dimension_6(); + + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) { + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) { + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) { + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) { + output(i0,i1,i2,i3,i4,i5,i6) = input ; + }}}}}} + } + + DynRankViewFill( const OutputView & arg_out , const_value_type & arg_in ) + : output( arg_out ), input( arg_in ) + { + typedef typename OutputView::execution_space execution_space ; + typedef Kokkos::RangePolicy< execution_space > Policy ; + + const Kokkos::Impl::ParallelFor< DynRankViewFill , Policy > closure( *this , Policy( 0 , output.dimension_0() ) ); + + closure.execute(); + + execution_space::fence(); + } +}; + +template< class OutputView > +struct DynRankViewFill< OutputView , typename std::enable_if< OutputView::Rank == 0 >::type > { + DynRankViewFill( const OutputView & dst , const typename OutputView::const_value_type & src ) + { + Kokkos::Impl::DeepCopy< typename OutputView::memory_space , Kokkos::HostSpace > + ( dst.data() , & src , sizeof(typename OutputView::const_value_type) ); + } +}; + +template< class OutputView , class InputView , class ExecSpace = typename OutputView::execution_space > +struct DynRankViewRemap { + + const OutputView output ; + const InputView input ; + const size_t n0 ; + const size_t n1 ; + const size_t n2 ; + const size_t n3 ; + const size_t n4 ; + const size_t n5 ; + const size_t n6 ; + const size_t n7 ; + + DynRankViewRemap( const OutputView & arg_out , const InputView & arg_in ) + : output( arg_out ), input( arg_in ) + , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) + , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) + , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) + , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) + , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) + , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) + , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) + , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) + { + typedef Kokkos::RangePolicy< ExecSpace > Policy ; + const Kokkos::Impl::ParallelFor< DynRankViewRemap , Policy > closure( *this , Policy( 0 , n0 ) ); + closure.execute(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i0 ) const + { + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) { + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) { + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) { + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) { + output(i0,i1,i2,i3,i4,i5,i6) = input(i0,i1,i2,i3,i4,i5,i6); + }}}}}} + } +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + + +namespace Kokkos { +namespace Experimental { + +/** \brief Deep copy a value from Host memory into a view. */ +template< class DT , class ... DP > +inline +void deep_copy + ( const DynRankView<DT,DP...> & dst + , typename ViewTraits<DT,DP...>::const_value_type & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type , + typename ViewTraits<DT,DP...>::value_type >::value + , "deep_copy requires non-const type" ); + + Kokkos::Experimental::Impl::DynRankViewFill< DynRankView<DT,DP...> >( dst , value ); +} + +/** \brief Deep copy into a value in Host memory from a view. */ +template< class ST , class ... SP > +inline +void deep_copy + ( typename ViewTraits<ST,SP...>::non_const_value_type & dst + , const DynRankView<ST,SP...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value + >::type * = 0 ) +{ + if ( src.rank() != 0 ) + { + Kokkos::abort(""); + } + + typedef ViewTraits<ST,SP...> src_traits ; + typedef typename src_traits::memory_space src_memory_space ; + Kokkos::Impl::DeepCopy< HostSpace , src_memory_space >( & dst , src.data() , sizeof(ST) ); +} + +//---------------------------------------------------------------------------- +/** \brief A deep copy between views of the default specialization, compatible type, + * same rank, same contiguous layout. + */ +template< class DstType , class SrcType > +inline +void deep_copy + ( const DstType & dst + , const SrcType & src + , typename std::enable_if<( + std::is_same< typename DstType::traits::specialize , void >::value && + std::is_same< typename SrcType::traits::specialize , void >::value + && + ( Kokkos::Experimental::is_dyn_rank_view<DstType>::value || Kokkos::Experimental::is_dyn_rank_view<SrcType>::value) + )>::type * = 0 ) +{ + static_assert( + std::is_same< typename DstType::traits::value_type , + typename DstType::traits::non_const_value_type >::value + , "deep_copy requires non-const destination type" ); + + typedef DstType dst_type ; + typedef SrcType src_type ; + + typedef typename dst_type::execution_space dst_execution_space ; + typedef typename src_type::execution_space src_execution_space ; + typedef typename dst_type::memory_space dst_memory_space ; + typedef typename src_type::memory_space src_memory_space ; + + enum { DstExecCanAccessSrc = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + + enum { SrcExecCanAccessDst = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + + if ( (void *) dst.data() != (void*) src.data() ) { + + // Concern: If overlapping views then a parallel copy will be erroneous. + // ... + + // If same type, equal layout, equal dimensions, equal span, and contiguous memory then can byte-wise copy + if ( rank(src) == 0 && rank(dst) == 0 ) + { + typedef typename dst_type::value_type value_type ; + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , sizeof(value_type) ); + } + else if ( std::is_same< typename DstType::traits::value_type , + typename SrcType::traits::non_const_value_type >::value && + ( + ( std::is_same< typename DstType::traits::array_layout , + typename SrcType::traits::array_layout >::value + && + ( std::is_same< typename DstType::traits::array_layout , + typename Kokkos::LayoutLeft>::value + || + std::is_same< typename DstType::traits::array_layout , + typename Kokkos::LayoutRight>::value + ) + ) + || + ( + rank(dst) == 1 + && + rank(src) == 1 + ) + ) && + dst.span_is_contiguous() && + src.span_is_contiguous() && + dst.span() == src.span() && + dst.dimension_0() == src.dimension_0() && + dst.dimension_1() == src.dimension_1() && + dst.dimension_2() == src.dimension_2() && + dst.dimension_3() == src.dimension_3() && + dst.dimension_4() == src.dimension_4() && + dst.dimension_5() == src.dimension_5() && + dst.dimension_6() == src.dimension_6() && + dst.dimension_7() == src.dimension_7() ) { + + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , nbytes ); + } + else if ( std::is_same< typename DstType::traits::value_type , + typename SrcType::traits::non_const_value_type >::value && + ( + ( std::is_same< typename DstType::traits::array_layout , + typename SrcType::traits::array_layout >::value + && + std::is_same< typename DstType::traits::array_layout , + typename Kokkos::LayoutStride>::value + ) + || + ( + rank(dst) == 1 + && + rank(src) == 1 + ) + ) && + dst.span_is_contiguous() && + src.span_is_contiguous() && + dst.span() == src.span() && + dst.dimension_0() == src.dimension_0() && + dst.dimension_1() == src.dimension_1() && + dst.dimension_2() == src.dimension_2() && + dst.dimension_3() == src.dimension_3() && + dst.dimension_4() == src.dimension_4() && + dst.dimension_5() == src.dimension_5() && + dst.dimension_6() == src.dimension_6() && + dst.dimension_7() == src.dimension_7() && + dst.stride_0() == src.stride_0() && + dst.stride_1() == src.stride_1() && + dst.stride_2() == src.stride_2() && + dst.stride_3() == src.stride_3() && + dst.stride_4() == src.stride_4() && + dst.stride_5() == src.stride_5() && + dst.stride_6() == src.stride_6() && + dst.stride_7() == src.stride_7() + ) { + + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , nbytes ); + } + else if ( DstExecCanAccessSrc ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type >( dst , src ); + } + else if ( SrcExecCanAccessDst ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type , src_execution_space >( dst , src ); + } + else { + Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); + } + } +} + +} //end Experimental +} //end Kokkos + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +namespace Impl { + + +// Deduce Mirror Types +template<class Space, class T, class ... P> +struct MirrorDRViewType { + // The incoming view_type + typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> dest_view_type; + // If it is the same memory_space return the existsing view_type + // This will also keep the unmanaged trait if necessary + typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; +}; + +template<class Space, class T, class ... P> +struct MirrorDRVType { + // The incoming view_type + typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> view_type; +}; + +} + + +template< class T , class ... P > +inline +typename DynRankView<T,P...>::HostMirror +create_mirror( const DynRankView<T,P...> & src + , typename std::enable_if< + ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef DynRankView<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + return dst_type( std::string( src.label() ).append("_mirror") + , Impl::reconstructLayout(src.layout(), src.rank()) ); +} + + +template< class T , class ... P > +inline +typename DynRankView<T,P...>::HostMirror +create_mirror( const DynRankView<T,P...> & src + , typename std::enable_if< + std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef DynRankView<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + return dst_type( std::string( src.label() ).append("_mirror") + , Impl::reconstructLayout(src.layout(), src.rank()) ); +} + + +// Create a mirror in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src) { + return typename Impl::MirrorDRVType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) ); +} + +template< class T , class ... P > +inline +typename DynRankView<T,P...>::HostMirror +create_mirror_view( const DynRankView<T,P...> & src + , typename std::enable_if<( + std::is_same< typename DynRankView<T,P...>::memory_space + , typename DynRankView<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename DynRankView<T,P...>::data_type + , typename DynRankView<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return src ; +} + +template< class T , class ... P > +inline +typename DynRankView<T,P...>::HostMirror +create_mirror_view( const DynRankView<T,P...> & src + , typename std::enable_if< ! ( + std::is_same< typename DynRankView<T,P...>::memory_space + , typename DynRankView<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename DynRankView<T,P...>::data_type + , typename DynRankView<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return Kokkos::Experimental::create_mirror( src ); +} + +// Create a mirror view in a new space (specialization for same space) +template<class Space, class T, class ... P> +typename Impl::MirrorDRViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src + , typename std::enable_if<Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return src; +} + +// Create a mirror view in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorDRViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src + , typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return typename Impl::MirrorDRViewType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) ); +} + +} //end Experimental +} //end Kokkos + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +/** \brief Resize a view with copying old data to new data at the corresponding indices. */ +template< class T , class ... P > +inline +void resize( DynRankView<T,P...> & v , + const size_t n0 = ~size_t(0) , + const size_t n1 = ~size_t(0) , + const size_t n2 = ~size_t(0) , + const size_t n3 = ~size_t(0) , + const size_t n4 = ~size_t(0) , + const size_t n5 = ~size_t(0) , + const size_t n6 = ~size_t(0) , + const size_t n7 = ~size_t(0) ) +{ + typedef DynRankView<T,P...> drview_type ; + + static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); + + drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 ); + + Kokkos::Experimental::Impl::DynRankViewRemap< drview_type , drview_type >( v_resized, v ); + + v = v_resized ; +} + +/** \brief Resize a view with copying old data to new data at the corresponding indices. */ +template< class T , class ... P > +inline +void realloc( DynRankView<T,P...> & v , + const size_t n0 = ~size_t(0) , + const size_t n1 = ~size_t(0) , + const size_t n2 = ~size_t(0) , + const size_t n3 = ~size_t(0) , + const size_t n4 = ~size_t(0) , + const size_t n5 = ~size_t(0) , + const size_t n6 = ~size_t(0) , + const size_t n7 = ~size_t(0) ) +{ + typedef DynRankView<T,P...> drview_type ; + + static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); + + const std::string label = v.label(); + + v = drview_type(); // Deallocate first, if the only view to allocation + v = drview_type( label, n0, n1, n2, n3, n4, n5, n6 ); +} + +} //end Experimental + +} //end Kokkos + +using Kokkos::Experimental::is_dyn_rank_view ; + +namespace Kokkos { + +template< typename D , class ... P > +using DynRankView = Kokkos::Experimental::DynRankView< D , P... > ; + +using Kokkos::Experimental::deep_copy ; +using Kokkos::Experimental::create_mirror ; +using Kokkos::Experimental::create_mirror_view ; +using Kokkos::Experimental::subdynrankview ; +using Kokkos::Experimental::subview ; +using Kokkos::Experimental::resize ; +using Kokkos::Experimental::realloc ; + +} //end Kokkos +#endif diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fb364f0bf252e2ccae8aa04544487bc8f3f1a74f --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -0,0 +1,494 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_DYNAMIC_VIEW_HPP +#define KOKKOS_DYNAMIC_VIEW_HPP + +#include <cstdio> + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> + +namespace Kokkos { +namespace Experimental { + +/** \brief Dynamic views are restricted to rank-one and no layout. + * Subviews are not allowed. + */ +template< typename DataType , typename ... P > +class DynamicView : public Kokkos::Experimental::ViewTraits< DataType , P ... > +{ +public: + + typedef ViewTraits< DataType , P ... > traits ; + +private: + + template< class , class ... > friend class DynamicView ; + + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + static_assert( traits::rank == 1 && traits::rank_dynamic == 1 + , "DynamicView must be rank-one" ); + + static_assert( std::is_trivial< typename traits::value_type >::value && + std::is_same< typename traits::specialize , void >::value + , "DynamicView must have trivial data type" ); + +public: + + typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ; + +private: + + memory_pool m_pool ; + track_type m_track ; + typename traits::value_type ** m_chunks ; + unsigned m_chunk_shift ; + unsigned m_chunk_mask ; + unsigned m_chunk_max ; + +public: + + //---------------------------------------------------------------------- + + /** \brief Compatible view of array of scalar types */ + typedef DynamicView< typename traits::data_type , + typename traits::device_type > + array_type ; + + /** \brief Compatible view of const data type */ + typedef DynamicView< typename traits::const_data_type , + typename traits::device_type > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef DynamicView< typename traits::non_const_data_type , + typename traits::device_type > + non_const_type ; + + /** \brief Must be accessible everywhere */ + typedef DynamicView HostMirror ; + + //---------------------------------------------------------------------- + + enum { Rank = 1 }; + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const + { + return + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space + >::value + ? // Runtime size is at the end of the chunk pointer array + (*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max )) + << m_chunk_shift + : 0 ; + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + size_t extent( const iType & r ) const + { return r == 0 ? size() : 1 ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + size_t extent_int( const iType & r ) const + { return r == 0 ? size() : 1 ; } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return size(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return 1 ; } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return 0 ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { *s = 0 ; } + + //---------------------------------------------------------------------- + // Range span is the span which contains all members. + + typedef typename traits::value_type & reference_type ; + typedef typename traits::value_type * pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return false ; } + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return 0 ; } + + //---------------------------------------- + + template< typename I0 , class ... Args > + KOKKOS_INLINE_FUNCTION + reference_type operator()( const I0 & i0 , const Args & ... args ) const + { + static_assert( Kokkos::Impl::are_integral<I0,Args...>::value + , "Indices must be integral type" ); + + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space + >::verify(); + + // Which chunk is being indexed. + const uintptr_t ic = uintptr_t( i0 >> m_chunk_shift ); + + typename traits::value_type * volatile * const ch = m_chunks + ic ; + + // Do bounds checking if enabled or if the chunk pointer is zero. + // If not bounds checking then we assume a non-zero pointer is valid. + +#if ! defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + if ( 0 == *ch ) +#endif + { + // Verify that allocation of the requested chunk in in progress. + + // The allocated chunk counter is m_chunks[ m_chunk_max ] + const uintptr_t n = + *reinterpret_cast<uintptr_t volatile *>( m_chunks + m_chunk_max ); + + if ( n <= ic ) { + Kokkos::abort("Kokkos::DynamicView array bounds error"); + } + + // Allocation of this chunk is in progress + // so wait for allocation to complete. + while ( 0 == *ch ); + } + + return (*ch)[ i0 & m_chunk_mask ]; + } + + //---------------------------------------- + /** \brief Resizing in parallel only increases the array size, + * never decrease. + */ + KOKKOS_INLINE_FUNCTION + void resize_parallel( size_t n ) const + { + typedef typename traits::value_type value_type ; + + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space >::verify(); + + const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; + + if ( m_chunk_max < NC ) { +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + printf("DynamicView::resize_parallel(%lu) m_chunk_max(%lu) NC(%lu)\n" + , n , m_chunk_max , NC ); +#endif + Kokkos::abort("DynamicView::resize_parallel exceeded maximum size"); + } + + typename traits::value_type * volatile * const ch = m_chunks ; + + // The allocated chunk counter is m_chunks[ m_chunk_max ] + uintptr_t volatile * const pc = + reinterpret_cast<uintptr_t volatile*>( m_chunks + m_chunk_max ); + + // Potentially concurrent iteration of allocation to the required size. + + for ( uintptr_t jc = *pc ; jc < NC ; ) { + + // Claim the 'jc' chunk to-be-allocated index + + const uintptr_t jc_try = jc ; + + // Jump iteration to the chunk counter. + + jc = atomic_compare_exchange( pc , jc_try , jc_try + 1 ); + + if ( jc_try == jc ) { + + ch[jc_try] = reinterpret_cast<value_type*>( + m_pool.allocate( sizeof(value_type) << m_chunk_shift )); + + Kokkos::memory_fence(); + } + } + } + + /** \brief Resizing in serial can grow or shrink the array size, */ + inline + void resize_serial( size_t n ) + { + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space >::verify(); + + const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; + + if ( m_chunk_max < NC ) { + Kokkos::abort("DynamicView::resize_serial exceeded maximum size"); + } + + uintptr_t * const pc = + reinterpret_cast<uintptr_t*>( m_chunks + m_chunk_max ); + + if ( *pc < NC ) { + while ( *pc < NC ) { + m_chunks[*pc] = + m_pool.allocate( sizeof(traits::value_type) << m_chunk_shift ); + ++*pc ; + } + } + else { + while ( NC + 1 <= *pc ) { + --*pc ; + m_pool.deallocate( m_chunks[*pc] + , sizeof(traits::value_type) << m_chunk_shift ); + m_chunks[*pc] = 0 ; + } + } + } + + //---------------------------------------------------------------------- + + ~DynamicView() = default ; + DynamicView() = default ; + DynamicView( DynamicView && ) = default ; + DynamicView( const DynamicView & ) = default ; + DynamicView & operator = ( DynamicView && ) = default ; + DynamicView & operator = ( const DynamicView & ) = default ; + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynamicView( const DynamicView<RT,RP...> & rhs ) + : m_pool( rhs.m_pool ) + , m_track( rhs.m_track ) + , m_chunks( rhs.m_chunks ) + , m_chunk_shift( rhs.m_chunk_shift ) + , m_chunk_mask( rhs.m_chunk_mask ) + , m_chunk_max( rhs.m_chunk_max ) + { + } + + //---------------------------------------------------------------------- + + struct Destroy { + memory_pool m_pool ; + typename traits::value_type ** m_chunks ; + unsigned m_chunk_max ; + bool m_destroy ; + + // Initialize or destroy array of chunk pointers. + // Two entries beyond the max chunks are allocation counters. + + KOKKOS_INLINE_FUNCTION + void operator()( unsigned i ) const + { + if ( m_destroy && i < m_chunk_max && 0 != m_chunks[i] ) { + m_pool.deallocate( m_chunks[i] , m_pool.get_min_block_size() ); + } + m_chunks[i] = 0 ; + } + + void execute( bool arg_destroy ) + { + typedef Kokkos::RangePolicy< typename traits::execution_space > Range ; + + m_destroy = arg_destroy ; + + Kokkos::Impl::ParallelFor<Destroy,Range> + closure( *this , Range(0, m_chunk_max + 1) ); + + closure.execute(); + + traits::execution_space::fence(); + } + + void construct_shared_allocation() + { execute( false ); } + + void destroy_shared_allocation() + { execute( true ); } + + Destroy() = default ; + Destroy( Destroy && ) = default ; + Destroy( const Destroy & ) = default ; + Destroy & operator = ( Destroy && ) = default ; + Destroy & operator = ( const Destroy & ) = default ; + + Destroy( const memory_pool & arg_pool + , typename traits::value_type ** arg_chunk + , const unsigned arg_chunk_max ) + : m_pool( arg_pool ) + , m_chunks( arg_chunk ) + , m_chunk_max( arg_chunk_max ) + , m_destroy( false ) + {} + }; + + + /**\brief Allocation constructor + * + * Memory is allocated in chunks from the memory pool. + * The chunk size conforms to the memory pool's chunk size. + * A maximum size is required in order to allocate a + * chunk-pointer array. + */ + explicit inline + DynamicView( const std::string & arg_label + , const memory_pool & arg_pool + , const size_t arg_size_max ) + : m_pool( arg_pool ) + , m_track() + , m_chunks(0) + // The memory pool chunk is guaranteed to be a power of two + , m_chunk_shift( + Kokkos::Impl::integral_power_of_two( + m_pool.get_min_block_size()/sizeof(typename traits::value_type)) ) + , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 ) + , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift ) + { + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space >::verify(); + + // A functor to deallocate all of the chunks upon final destruction + + typedef typename traits::memory_space memory_space ; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< memory_space , Destroy > record_type ; + + // Allocate chunk pointers and allocation counter + record_type * const record = + record_type::allocate( memory_space() + , arg_label + , ( sizeof(pointer_type) * ( m_chunk_max + 1 ) ) ); + + m_chunks = reinterpret_cast<pointer_type*>( record->data() ); + + record->m_destroy = Destroy( m_pool , m_chunks , m_chunk_max ); + + // Initialize to zero + + record->m_destroy.construct_shared_allocation(); + + m_track.assign_allocated_record_to_uninitialized( record ); + } +}; + +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { + +template< class T , class ... P > +inline +typename Kokkos::Experimental::DynamicView<T,P...>::HostMirror +create_mirror_view( const Kokkos::Experimental::DynamicView<T,P...> & src ) +{ + return src ; +} + +template< class T , class ... DP , class ... SP > +inline +void deep_copy( const View<T,DP...> & dst + , const DynamicView<T,SP...> & src + ) +{ + typedef View<T,DP...> dst_type ; + typedef DynamicView<T,SP...> src_type ; + + typedef typename ViewTraits<T,DP...>::execution_space dst_execution_space ; + typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ; + + enum { DstExecCanAccessSrc = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + + if ( DstExecCanAccessSrc ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + } + else { + Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); + } +} + +template< class T , class ... DP , class ... SP > +inline +void deep_copy( const DynamicView<T,DP...> & dst + , const View<T,SP...> & src + ) +{ + typedef DynamicView<T,SP...> dst_type ; + typedef View<T,DP...> src_type ; + + typedef typename ViewTraits<T,DP...>::execution_space dst_execution_space ; + typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ; + + enum { DstExecCanAccessSrc = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + + if ( DstExecCanAccessSrc ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + } + else { + Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); + } +} + +} // namespace Experimental +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_DYNAMIC_VIEW_HPP */ + diff --git a/lib/kokkos/containers/src/Kokkos_Functional.hpp b/lib/kokkos/containers/src/Kokkos_Functional.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5c7350ef1cd3bb1ed68deff0c823ce3f7a5a3619 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_Functional.hpp @@ -0,0 +1,173 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_FUNCTIONAL_HPP +#define KOKKOS_FUNCTIONAL_HPP + +#include <Kokkos_Macros.hpp> +#include <impl/Kokkos_Functional_impl.hpp> + +namespace Kokkos { + +// These should work for most types + +template <typename T> +struct pod_hash +{ + typedef T argument_type; + typedef T first_argument_type; + typedef uint32_t second_argument_type; + typedef uint32_t result_type; + + KOKKOS_FORCEINLINE_FUNCTION + uint32_t operator()(T const & t) const + { return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); } + + KOKKOS_FORCEINLINE_FUNCTION + uint32_t operator()(T const & t, uint32_t seed) const + { return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); } +}; + +template <typename T> +struct pod_equal_to +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return Impl::bitwise_equal(&a,&b); } +}; + +template <typename T> +struct pod_not_equal_to +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return !Impl::bitwise_equal(&a,&b); } +}; + +template <typename T> +struct equal_to +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a == b; } +}; + +template <typename T> +struct not_equal_to +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a != b; } +}; + + +template <typename T> +struct greater +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a > b; } +}; + + +template <typename T> +struct less +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a < b; } +}; + +template <typename T> +struct greater_equal +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a >= b; } +}; + + +template <typename T> +struct less_equal +{ + typedef T first_argument_type; + typedef T second_argument_type; + typedef bool result_type; + + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(T const & a, T const & b) const + { return a <= b; } +}; + +} // namespace Kokkos + + +#endif //KOKKOS_FUNCTIONAL_HPP + + diff --git a/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp b/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5dd7a98b893f0418fb31c7ae6026ac30c886f84b --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp @@ -0,0 +1,531 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SEGMENTED_VIEW_HPP_ +#define KOKKOS_SEGMENTED_VIEW_HPP_ + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <cstdio> + +#if ! KOKKOS_USING_EXP_VIEW + +namespace Kokkos { +namespace Experimental { + +namespace Impl { + +template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> +struct delete_segmented_view; + +template<class MemorySpace> +inline +void DeviceSetAllocatableMemorySize(size_t) {} + +#if defined( KOKKOS_HAVE_CUDA ) + +template<> +inline +void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) { +#ifdef __CUDACC__ + size_t size_limit; + cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); + if(size_limit<size) + cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); + cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); +#endif +} + +template<> +inline +void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) { +#ifdef __CUDACC__ + size_t size_limit; + cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); + if(size_limit<size) + cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); + cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); +#endif +} + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ + +} + +template< class DataType , + class Arg1Type = void , + class Arg2Type = void , + class Arg3Type = void> +class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > +{ +public: + //! \name Typedefs for device types and various Kokkos::View specializations. + //@{ + typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + + //! The type of a Kokkos::View on the device. + typedef Kokkos::View< typename traits::data_type , + typename traits::array_layout , + typename traits::memory_space , + Kokkos::MemoryUnmanaged > t_dev ; + + +private: + Kokkos::View<t_dev*,typename traits::memory_space> segments_; + + Kokkos::View<int,typename traits::memory_space> realloc_lock; + Kokkos::View<int,typename traits::memory_space> nsegments_; + + size_t segment_length_; + size_t segment_length_m1_; + int max_segments_; + + int segment_length_log2; + + // Dimensions, cardinality, capacity, and offset computation for + // multidimensional array view of contiguous memory. + // Inherits from Impl::Shape + typedef Kokkos::Impl::ViewOffset< typename traits::shape_type + , typename traits::array_layout + > offset_map_type ; + + offset_map_type m_offset_map ; + + typedef Kokkos::View< typename traits::array_intrinsic_type , + typename traits::array_layout , + typename traits::memory_space , + typename traits::memory_traits > array_type ; + + typedef Kokkos::View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::memory_space , + typename traits::memory_traits > const_type ; + + typedef Kokkos::View< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::memory_space , + typename traits::memory_traits > non_const_type ; + + typedef Kokkos::View< typename traits::non_const_data_type , + typename traits::array_layout , + HostSpace , + void > HostMirror ; + + template< bool Accessible > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type + dimension_0_intern() const { return nsegments_() * segment_length_ ; } + + template< bool Accessible > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type + dimension_0_intern() const + { + // In Host space + int n = 0 ; +#if ! defined( __CUDA_ARCH__ ) + Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) ); +#endif + + return n * segment_length_ ; + } + +public: + + enum { Rank = traits::rank }; + + KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; } + + /* \brief return (current) size of dimension 0 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { + enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; + int n = SegmentedView::dimension_0_intern< Accessible >(); + return n ; + } + + /* \brief return size of dimension 1 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; } + /* \brief return size of dimension 2 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; } + /* \brief return size of dimension 3 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; } + /* \brief return size of dimension 4 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; } + /* \brief return size of dimension 5 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; } + /* \brief return size of dimension 6 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; } + /* \brief return size of dimension 7 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; } + + /* \brief return size of dimension 2 */ + KOKKOS_INLINE_FUNCTION typename traits::size_type size() const { + return dimension_0() * + m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * + m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ; + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename traits::size_type dimension( const iType & i ) const { + if(i==0) + return dimension_0(); + else + return Kokkos::Impl::dimension( m_offset_map , i ); + } + + KOKKOS_INLINE_FUNCTION + typename traits::size_type capacity() { + return segments_.dimension_0() * + m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * + m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7; + } + + KOKKOS_INLINE_FUNCTION + typename traits::size_type get_num_segments() { + enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; + int n = SegmentedView::dimension_0_intern< Accessible >(); + return n/segment_length_ ; + } + + KOKKOS_INLINE_FUNCTION + typename traits::size_type get_max_segments() { + return max_segments_; + } + + /// \brief Constructor that allocates View objects with an initial length of 0. + /// + /// This constructor works mostly like the analogous constructor of View. + /// The first argument is a string label, which is entirely for your + /// benefit. (Different SegmentedView objects may have the same label if + /// you like.) The second argument 'view_length' is the size of the segments. + /// This number must be a power of two. The third argument n0 is the maximum + /// value for the first dimension of the segmented view. The maximal allocatable + /// number of Segments is thus: (n0+view_length-1)/view_length. + /// The arguments that follow are the other dimensions of the (1-7) of the + /// View objects. For example, for a View with 3 runtime dimensions, + /// the first 4 integer arguments will be nonzero: + /// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView + /// with a maximum of 306 segments of dimension (32768,8,4). The logical size of + /// the segmented view is (n,8,4) with n between 0 and 10000000. + /// You may omit the integer arguments that follow. + template< class LabelType > + SegmentedView(const LabelType & label , + const size_t view_length , + const size_t n0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 + ): segment_length_(view_length),segment_length_m1_(view_length-1) + { + segment_length_log2 = -1; + size_t l = segment_length_; + while(l>0) { + l>>=1; + segment_length_log2++; + } + l = 1<<segment_length_log2; + if(l!=segment_length_) + Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length"); + + max_segments_ = (n0+segment_length_m1_)/segment_length_; + + Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type)); + + segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_); + realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock"); + nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews"); + m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 ); + + } + + KOKKOS_INLINE_FUNCTION + SegmentedView(const SegmentedView& src): + segments_(src.segments_), + realloc_lock (src.realloc_lock), + nsegments_ (src.nsegments_), + segment_length_(src.segment_length_), + segment_length_m1_(src.segment_length_m1_), + max_segments_ (src.max_segments_), + segment_length_log2(src.segment_length_log2), + m_offset_map (src.m_offset_map) + {} + + KOKKOS_INLINE_FUNCTION + SegmentedView& operator= (const SegmentedView& src) { + segments_ = src.segments_; + realloc_lock = src.realloc_lock; + nsegments_ = src.nsegments_; + segment_length_= src.segment_length_; + segment_length_m1_= src.segment_length_m1_; + max_segments_ = src.max_segments_; + segment_length_log2= src.segment_length_log2; + m_offset_map = src.m_offset_map; + return *this; + } + + ~SegmentedView() { + if ( !segments_.tracker().ref_counting()) { return; } + size_t ref_count = segments_.tracker().ref_count(); + if(ref_count == 1u) { + Kokkos::fence(); + typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews"); + Kokkos::deep_copy(h_nviews,nsegments_); + Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this)); + } + } + + KOKKOS_INLINE_FUNCTION + t_dev get_segment(const int& i) const { + return segments_[i]; + } + + template< class MemberType> + KOKKOS_INLINE_FUNCTION + void grow (MemberType& team_member, const size_t& growSize) const { + if (growSize>max_segments_*segment_length_) { + printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_); + return; + } + + if(team_member.team_rank()==0) { + bool too_small = growSize > segment_length_ * nsegments_(); + if (too_small) { + while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) ) + ; // get the lock + too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock + if(too_small) { + while(too_small) { + const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* + m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; + typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size]; + + segments_(nsegments_()) = + t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7); + nsegments_()++; + too_small = growSize > segment_length_ * nsegments_(); + } + } + realloc_lock() = 0; //release the lock + } + } + team_member.team_barrier(); + } + + KOKKOS_INLINE_FUNCTION + void grow_non_thread_safe (const size_t& growSize) const { + if (growSize>max_segments_*segment_length_) { + printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_); + return; + } + bool too_small = growSize > segment_length_ * nsegments_(); + if(too_small) { + while(too_small) { + const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* + m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; + typename traits::non_const_value_type* const ptr = + new typename traits::non_const_value_type[alloc_size]; + + segments_(nsegments_()) = + t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2, + m_offset_map.N3, m_offset_map.N4, m_offset_map.N5, + m_offset_map.N6, m_offset_map.N7); + nsegments_()++; + too_small = growSize > segment_length_ * nsegments_(); + } + } + } + + template< typename iType0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_)); + } + + template< typename iType0 , typename iType1 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + traits::rank == 2 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1); + } + + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + traits::rank == 3 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + std::is_integral<iType3>::value && + traits::rank == 4 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , + typename iType4 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + std::is_integral<iType3>::value && + std::is_integral<iType4>::value && + traits::rank == 5 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , + const iType4 & i4 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , + typename iType4 , typename iType5 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + std::is_integral<iType3>::value && + std::is_integral<iType4>::value && + std::is_integral<iType5>::value && + traits::rank == 6 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , + const iType4 & i4 , const iType5 & i5 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , + typename iType4 , typename iType5 , typename iType6 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + std::is_integral<iType3>::value && + std::is_integral<iType4>::value && + std::is_integral<iType5>::value && + std::is_integral<iType6>::value && + traits::rank == 7 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , + const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , + typename iType4 , typename iType5 , typename iType6 , typename iType7 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<iType0>::value && + std::is_integral<iType1>::value && + std::is_integral<iType2>::value && + std::is_integral<iType3>::value && + std::is_integral<iType4>::value && + std::is_integral<iType5>::value && + std::is_integral<iType6>::value && + std::is_integral<iType7>::value && + traits::rank == 8 ) + , typename traits::value_type & + >::type + operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , + const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const + { + return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7); + } +}; + +namespace Impl { +template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> +struct delete_segmented_view { + typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type; + typedef typename view_type::execution_space execution_space; + + view_type view_; + delete_segmented_view(view_type view):view_(view) { + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i) const { + delete [] view_.get_segment(i).ptr_on_device(); + } +}; + +} +} +} + +#endif + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1ce38638a2b6a107d1439f7feebb0c90c4a8068f --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp @@ -0,0 +1,226 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STATICCRSGRAPH_HPP +#define KOKKOS_STATICCRSGRAPH_HPP + +#include <string> +#include <vector> + +#include <Kokkos_Core.hpp> + +namespace Kokkos { + +/// \class StaticCrsGraph +/// \brief Compressed row storage array. +/// +/// \tparam DataType The type of stored entries. If a StaticCrsGraph is +/// used as the graph of a sparse matrix, then this is usually an +/// integer type, the type of the column indices in the sparse +/// matrix. +/// +/// \tparam Arg1Type The second template parameter, corresponding +/// either to the Device type (if there are no more template +/// parameters) or to the Layout type (if there is at least one more +/// template parameter). +/// +/// \tparam Arg2Type The third template parameter, which if provided +/// corresponds to the Device type. +/// +/// \tparam SizeType The type of row offsets. Usually the default +/// parameter suffices. However, setting a nondefault value is +/// necessary in some cases, for example, if you want to have a +/// sparse matrices with dimensions (and therefore column indices) +/// that fit in \c int, but want to store more than <tt>INT_MAX</tt> +/// entries in the sparse matrix. +/// +/// A row has a range of entries: +/// <ul> +/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li> +/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li> +/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li> +/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li> +/// </ul> +template< class DataType, + class Arg1Type, + class Arg2Type = void, + typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type> +class StaticCrsGraph { +private: + typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits; + +public: + typedef DataType data_type; + typedef typename traits::array_layout array_layout; + typedef typename traits::execution_space execution_space; + typedef typename traits::device_type device_type; + typedef SizeType size_type; + + typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type; + typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror; + typedef View< const size_type* , array_layout, device_type > row_map_type; + typedef View< DataType* , array_layout, device_type > entries_type; + + entries_type entries; + row_map_type row_map; + + //! Construct an empty view. + StaticCrsGraph () : entries(), row_map() {} + + //! Copy constructor (shallow copy). + StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map) + {} + + template<class EntriesType, class RowMapType> + StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_) + {} + + /** \brief Assign to a view of the rhs array. + * If the old view is the last view + * then allocated memory is deallocated. + */ + StaticCrsGraph& operator= (const StaticCrsGraph& rhs) { + entries = rhs.entries; + row_map = rhs.row_map; + return *this; + } + + /** \brief Destroy this view of the array. + * If the last view then allocated memory is deallocated. + */ + ~StaticCrsGraph() {} + + KOKKOS_INLINE_FUNCTION + size_type numRows() const { + return (row_map.dimension_0 () != 0) ? + row_map.dimension_0 () - static_cast<size_type> (1) : + static_cast<size_type> (0); + } +}; + +//---------------------------------------------------------------------------- + +template< class StaticCrsGraphType , class InputSizeType > +typename StaticCrsGraphType::staticcrsgraph_type +create_staticcrsgraph( const std::string & label , + const std::vector< InputSizeType > & input ); + +template< class StaticCrsGraphType , class InputSizeType > +typename StaticCrsGraphType::staticcrsgraph_type +create_staticcrsgraph( const std::string & label , + const std::vector< std::vector< InputSizeType > > & input ); + +//---------------------------------------------------------------------------- + +template< class DataType , + class Arg1Type , + class Arg2Type , + typename SizeType > +typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror +create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input ); + +template< class DataType , + class Arg1Type , + class Arg2Type , + typename SizeType > +typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror +create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input ); + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#include <impl/Kokkos_StaticCrsGraph_factory.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class GraphType > +struct StaticCrsGraphMaximumEntry { + + typedef typename GraphType::execution_space execution_space ; + typedef typename GraphType::data_type value_type ; + + const typename GraphType::entries_type entries ; + + StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned i , value_type & update ) const + { if ( update < entries(i) ) update = entries(i); } + + KOKKOS_INLINE_FUNCTION + void init( value_type & update ) const + { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & update , + volatile const value_type & input ) const + { if ( update < input ) update = input ; } +}; + +} + +template< class DataType, class Arg1Type, class Arg2Type, typename SizeType > +DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph ) +{ + typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ; + typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ; + + DataType result = 0 ; + Kokkos::parallel_reduce( graph.entries.dimension_0(), + FunctorType(graph), result ); + return result ; +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_CRSARRAY_HPP */ + diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7a916c6ef7c449a041d6d2014033e34c3342f185 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -0,0 +1,848 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_UnorderedMap.hpp +/// \brief Declaration and definition of Kokkos::UnorderedMap. +/// +/// This header file declares and defines Kokkos::UnorderedMap and its +/// related nonmember functions. + +#ifndef KOKKOS_UNORDERED_MAP_HPP +#define KOKKOS_UNORDERED_MAP_HPP + +#include <Kokkos_Core.hpp> +#include <Kokkos_Functional.hpp> + +#include <Kokkos_Bitset.hpp> + +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_UnorderedMap_impl.hpp> + + +#include <iostream> + +#include <stdint.h> +#include <stdexcept> + + +namespace Kokkos { + +enum { UnorderedMapInvalidIndex = ~0u }; + +/// \brief First element of the return value of UnorderedMap::insert(). +/// +/// Inserting an element into an UnorderedMap is not guaranteed to +/// succeed. There are three possible conditions: +/// <ol> +/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually +/// means that the UnorderedMap ran out of space. </li> +/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key +/// did <i>not</i> exist in the table before. </li> +/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key +/// <i>did</i> exist in the table before. The new value was +/// ignored and the old value was left in place. </li> +/// </ol> + +class UnorderedMapInsertResult +{ +private: + enum Status{ + SUCCESS = 1u << 31 + , EXISTING = 1u << 30 + , FREED_EXISTING = 1u << 29 + , LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING) + }; + +public: + /// Did the map successful insert the key/value pair + KOKKOS_FORCEINLINE_FUNCTION + bool success() const { return (m_status & SUCCESS); } + + /// Was the key already present in the map + KOKKOS_FORCEINLINE_FUNCTION + bool existing() const { return (m_status & EXISTING); } + + /// Did the map fail to insert the key due to insufficent capacity + KOKKOS_FORCEINLINE_FUNCTION + bool failed() const { return m_index == UnorderedMapInvalidIndex; } + + /// Did the map lose a race condition to insert a dupulicate key/value pair + /// where an index was claimed that needed to be released + KOKKOS_FORCEINLINE_FUNCTION + bool freed_existing() const { return (m_status & FREED_EXISTING); } + + /// How many iterations through the insert loop did it take before the + /// map returned + KOKKOS_FORCEINLINE_FUNCTION + uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); } + + /// Index where the key can be found as long as the insert did not fail + KOKKOS_FORCEINLINE_FUNCTION + uint32_t index() const { return m_index; } + + KOKKOS_FORCEINLINE_FUNCTION + UnorderedMapInsertResult() + : m_index(UnorderedMapInvalidIndex) + , m_status(0) + {} + + KOKKOS_FORCEINLINE_FUNCTION + void increment_list_position() + { + m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u; + } + + KOKKOS_FORCEINLINE_FUNCTION + void set_existing(uint32_t i, bool arg_freed_existing) + { + m_index = i; + m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position(); + } + + KOKKOS_FORCEINLINE_FUNCTION + void set_success(uint32_t i) + { + m_index = i; + m_status = SUCCESS | list_position(); + } + +private: + uint32_t m_index; + uint32_t m_status; +}; + +/// \class UnorderedMap +/// \brief Thread-safe, performance-portable lookup table. +/// +/// This class provides a lookup table. In terms of functionality, +/// this class compares to std::unordered_map (new in C++11). +/// "Unordered" means that keys are not stored in any particular +/// order, unlike (for example) std::map. "Thread-safe" means that +/// lookups, insertion, and deletion are safe to call by multiple +/// threads in parallel. "Performance-portable" means that parallel +/// performance of these operations is reasonable, on multiple +/// hardware platforms. Platforms on which performance has been +/// tested include conventional Intel x86 multicore processors, Intel +/// Xeon Phi ("MIC"), and NVIDIA GPUs. +/// +/// Parallel performance portability entails design decisions that +/// might differ from one's expectation for a sequential interface. +/// This particularly affects insertion of single elements. In an +/// interface intended for sequential use, insertion might reallocate +/// memory if the original allocation did not suffice to hold the new +/// element. In this class, insertion does <i>not</i> reallocate +/// memory. This means that it might fail. insert() returns an enum +/// which indicates whether the insert failed. There are three +/// possible conditions: +/// <ol> +/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually +/// means that the UnorderedMap ran out of space. </li> +/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key +/// did <i>not</i> exist in the table before. </li> +/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key +/// <i>did</i> exist in the table before. The new value was +/// ignored and the old value was left in place. </li> +/// </ol> +/// +/// \tparam Key Type of keys of the lookup table. If \c const, users +/// are not allowed to add or remove keys, though they are allowed +/// to change values. In that case, the implementation may make +/// optimizations specific to the <tt>Device</tt>. For example, if +/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access +/// keys. +/// +/// \tparam Value Type of values stored in the lookup table. You may use +/// \c void here, in which case the table will be a set of keys. If +/// \c const, users are not allowed to change entries. +/// In that case, the implementation may make +/// optimizations specific to the \c Device, such as using texture +/// fetches to access values. +/// +/// \tparam Device The Kokkos Device type. +/// +/// \tparam Hasher Definition of the hash function for instances of +/// <tt>Key</tt>. The default will calculate a bitwise hash. +/// +/// \tparam EqualTo Definition of the equality function for instances of +/// <tt>Key</tt>. The default will do a bitwise equality comparison. +/// +template < typename Key + , typename Value + , typename Device = Kokkos::DefaultExecutionSpace + , typename Hasher = pod_hash<typename Impl::remove_const<Key>::type> + , typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type> + > +class UnorderedMap +{ +private: + typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ; +public: + //! \name Public types and constants + //@{ + + //key_types + typedef Key declared_key_type; + typedef typename Impl::remove_const<declared_key_type>::type key_type; + typedef typename Impl::add_const<key_type>::type const_key_type; + + //value_types + typedef Value declared_value_type; + typedef typename Impl::remove_const<declared_value_type>::type value_type; + typedef typename Impl::add_const<value_type>::type const_value_type; + + typedef Device execution_space; + typedef Hasher hasher_type; + typedef EqualTo equal_to_type; + typedef uint32_t size_type; + + //map_types + typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type; + typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type; + typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type; + typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type; + + static const bool is_set = Impl::is_same<void,value_type>::value; + static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value; + static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value; + + static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value); + static const bool is_modifiable_map = has_const_key && !has_const_value; + static const bool is_const_map = has_const_key && has_const_value; + + + typedef UnorderedMapInsertResult insert_result; + + typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror; + + typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type; + + //@} + +private: + enum { invalid_index = ~static_cast<size_type>(0) }; + + typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type; + + typedef typename Impl::if_c< is_insertable_map + , View< key_type *, execution_space> + , View< const key_type *, execution_space, MemoryTraits<RandomAccess> > + >::type key_type_view; + + typedef typename Impl::if_c< is_insertable_map || is_modifiable_map + , View< impl_value_type *, execution_space> + , View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> > + >::type value_type_view; + + typedef typename Impl::if_c< is_insertable_map + , View< size_type *, execution_space> + , View< const size_type *, execution_space, MemoryTraits<RandomAccess> > + >::type size_type_view; + + typedef typename Impl::if_c< is_insertable_map + , Bitset< execution_space > + , ConstBitset< execution_space> + >::type bitset_type; + + enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 }; + enum { num_scalars = 3 }; + typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view; + +public: + //! \name Public member functions + //@{ + + UnorderedMap() + : m_bounded_insert() + , m_hasher() + , m_equal_to() + , m_size() + , m_available_indexes() + , m_hash_lists() + , m_next_index() + , m_keys() + , m_values() + , m_scalars() + {} + + /// \brief Constructor + /// + /// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map + /// \param hash [in] Hasher function for \c Key instances. The + /// default value usually suffices. + UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() ) + : m_bounded_insert(true) + , m_hasher(hasher) + , m_equal_to(equal_to) + , m_size() + , m_available_indexes(calculate_capacity(capacity_hint)) + , m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity())) + , m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference + , m_keys("UnorderedMap keys",capacity()+1) + , m_values("UnorderedMap values",(is_set? 1 : capacity()+1)) + , m_scalars("UnorderedMap scalars") + { + if (!is_insertable_map) { + throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map"); + } + + Kokkos::deep_copy(m_hash_lists, invalid_index); + Kokkos::deep_copy(m_next_index, invalid_index); + } + + void reset_failed_insert_flag() + { + reset_flag(failed_insert_idx); + } + + histogram_type get_histogram() + { + return histogram_type(*this); + } + + //! Clear all entries in the table. + void clear() + { + m_bounded_insert = true; + + if (capacity() == 0) return; + + m_available_indexes.clear(); + + Kokkos::deep_copy(m_hash_lists, invalid_index); + Kokkos::deep_copy(m_next_index, invalid_index); + { + const key_type tmp = key_type(); + Kokkos::deep_copy(m_keys,tmp); + } + if (is_set){ + const impl_value_type tmp = impl_value_type(); + Kokkos::deep_copy(m_values,tmp); + } + { + Kokkos::deep_copy(m_scalars, 0); + } + } + + /// \brief Change the capacity of the the map + /// + /// If there are no failed inserts the current size of the map will + /// be used as a lower bound for the input capacity. + /// If the map is not empty and does not have failed inserts + /// and the capacity changes then the current data is copied + /// into the resized / rehashed map. + /// + /// This is <i>not</i> a device function; it may <i>not</i> be + /// called in a parallel kernel. + bool rehash(size_type requested_capacity = 0) + { + const bool bounded_insert = (capacity() == 0) || (size() == 0u); + return rehash(requested_capacity, bounded_insert ); + } + + bool rehash(size_type requested_capacity, bool bounded_insert) + { + if(!is_insertable_map) return false; + + const size_type curr_size = size(); + requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity; + + insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to); + + if (curr_size) { + tmp.m_bounded_insert = false; + Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this); + f.apply(); + } + tmp.m_bounded_insert = bounded_insert; + + *this = tmp; + + return true; + } + + /// \brief The number of entries in the table. + /// + /// This method has undefined behavior when erasable() is true. + /// + /// Note that this is not a device function; it cannot be called in + /// a parallel kernel. The value is not stored as a variable; it + /// must be computed. + size_type size() const + { + if( capacity() == 0u ) return 0u; + if (modified()) { + m_size = m_available_indexes.count(); + reset_flag(modified_idx); + } + return m_size; + } + + /// \brief The current number of failed insert() calls. + /// + /// This is <i>not</i> a device function; it may <i>not</i> be + /// called in a parallel kernel. The value is not stored as a + /// variable; it must be computed. + bool failed_insert() const + { + return get_flag(failed_insert_idx); + } + + bool erasable() const + { + return is_insertable_map ? get_flag(erasable_idx) : false; + } + + bool begin_erase() + { + bool result = !erasable(); + if (is_insertable_map && result) { + execution_space::fence(); + set_flag(erasable_idx); + execution_space::fence(); + } + return result; + } + + bool end_erase() + { + bool result = erasable(); + if (is_insertable_map && result) { + execution_space::fence(); + Impl::UnorderedMapErase<declared_map_type> f(*this); + f.apply(); + execution_space::fence(); + reset_flag(erasable_idx); + } + return result; + } + + /// \brief The maximum number of entries that the table can hold. + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + KOKKOS_FORCEINLINE_FUNCTION + size_type capacity() const + { return m_available_indexes.size(); } + + /// \brief The number of hash table "buckets." + /// + /// This is different than the number of entries that the table can + /// hold. Each key hashes to an index in [0, hash_capacity() - 1]. + /// That index can hold zero or more entries. This class decides + /// what hash_capacity() should be, given the user's upper bound on + /// the number of entries the table must be able to hold. + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + KOKKOS_INLINE_FUNCTION + size_type hash_capacity() const + { return m_hash_lists.dimension_0(); } + + //--------------------------------------------------------------------------- + //--------------------------------------------------------------------------- + + + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. As discussed in the class documentation, it need not + /// succeed. The return value tells you if it did. + /// + /// \param k [in] The key to attempt to insert. + /// \param v [in] The corresponding value to attempt to insert. If + /// using this class as a set (with Value = void), then you need not + /// provide this value. + KOKKOS_INLINE_FUNCTION + insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const + { + insert_result result; + + if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) { + return result; + } + + if ( !m_scalars((int)modified_idx) ) { + m_scalars((int)modified_idx) = true; + } + + int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ; + + const size_type hash_value = m_hasher(k); + const size_type hash_list = hash_value % m_hash_lists.dimension_0(); + + size_type * curr_ptr = & m_hash_lists[ hash_list ]; + size_type new_index = invalid_index ; + + // Force integer multiply to long + size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0()); + + size_type find_attempts = 0; + + enum { bounded_find_attempts = 32u }; + const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ? + bounded_find_attempts : + m_available_indexes.max_hint(); + + bool not_done = true ; + +#if defined( __MIC__ ) + #pragma noprefetch +#endif + while ( not_done ) { + + // Continue searching the unordered list for this key, + // list will only be appended during insert phase. + // Need volatile_load as other threads may be appending. + size_type curr = volatile_load(curr_ptr); + + KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]); +#if defined( __MIC__ ) + #pragma noprefetch +#endif + while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) { + result.increment_list_position(); + index_hint = curr; + curr_ptr = &m_next_index[curr]; + curr = volatile_load(curr_ptr); + KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]); + } + + //------------------------------------------------------------ + // If key already present then return that index. + if ( curr != invalid_index ) { + + const bool free_existing = new_index != invalid_index; + if ( free_existing ) { + // Previously claimed an unused entry that was not inserted. + // Release this unused entry immediately. + if (!m_available_indexes.reset(new_index) ) { + printf("Unable to free existing\n"); + } + + } + + result.set_existing(curr, free_existing); + not_done = false ; + } + //------------------------------------------------------------ + // Key is not currently in the map. + // If the thread has claimed an entry try to insert now. + else { + + //------------------------------------------------------------ + // If have not already claimed an unused entry then do so now. + if (new_index == invalid_index) { + + bool found = false; + // use the hash_list as the flag for the search direction + Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list ); + + // found and index and this thread set it + if ( !found && ++find_attempts >= max_attempts ) { + failed_insert_ref = true; + not_done = false ; + } + else if (m_available_indexes.set(index_hint) ) { + new_index = index_hint; + // Set key and value + KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]); + m_keys[new_index] = k ; + + if (!is_set) { + KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]); + m_values[new_index] = v ; + } + + // Do not proceed until key and value are updated in global memory + memory_fence(); + } + } + else if (failed_insert_ref) { + not_done = false; + } + + // Attempt to append claimed entry into the list. + // Another thread may also be trying to append the same list so protect with atomic. + if ( new_index != invalid_index && + curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) { + // Succeeded in appending + result.set_success(new_index); + not_done = false ; + } + } + } // while ( not_done ) + + return result ; + } + + KOKKOS_INLINE_FUNCTION + bool erase(key_type const& k) const + { + bool result = false; + + if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) { + + if ( ! m_scalars((int)modified_idx) ) { + m_scalars((int)modified_idx) = true; + } + + size_type index = find(k); + if (valid_at(index)) { + m_available_indexes.reset(index); + result = true; + } + } + + return result; + } + + /// \brief Find the given key \c k, if it exists in the table. + /// + /// \return If the key exists in the table, the index of the + /// value corresponding to that key; otherwise, an invalid index. + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + KOKKOS_INLINE_FUNCTION + size_type find( const key_type & k) const + { + size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ; + + KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]); + while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) { + KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]); + curr = m_next_index[curr]; + } + + return curr; + } + + /// \brief Does the key exist in the map + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + KOKKOS_INLINE_FUNCTION + bool exists( const key_type & k) const + { + return valid_at(find(k)); + } + + + /// \brief Get the value with \c i as its direct index. + /// + /// \param i [in] Index directly into the array of entries. + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + /// + /// 'const value_type' via Cuda texture fetch must return by value. + KOKKOS_FORCEINLINE_FUNCTION + typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type + value_at(size_type i) const + { + return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ]; + } + + /// \brief Get the key with \c i as its direct index. + /// + /// \param i [in] Index directly into the array of entries. + /// + /// This <i>is</i> a device function; it may be called in a parallel + /// kernel. + KOKKOS_FORCEINLINE_FUNCTION + key_type key_at(size_type i) const + { + return m_keys[ i < capacity() ? i : capacity() ]; + } + + KOKKOS_FORCEINLINE_FUNCTION + bool valid_at(size_type i) const + { + return m_available_indexes.test(i); + } + + template <typename SKey, typename SValue> + UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src, + typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0 + ) + : m_bounded_insert(src.m_bounded_insert) + , m_hasher(src.m_hasher) + , m_equal_to(src.m_equal_to) + , m_size(src.m_size) + , m_available_indexes(src.m_available_indexes) + , m_hash_lists(src.m_hash_lists) + , m_next_index(src.m_next_index) + , m_keys(src.m_keys) + , m_values(src.m_values) + , m_scalars(src.m_scalars) + {} + + + template <typename SKey, typename SValue> + typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value + ,declared_map_type & >::type + operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src) + { + m_bounded_insert = src.m_bounded_insert; + m_hasher = src.m_hasher; + m_equal_to = src.m_equal_to; + m_size = src.m_size; + m_available_indexes = src.m_available_indexes; + m_hash_lists = src.m_hash_lists; + m_next_index = src.m_next_index; + m_keys = src.m_keys; + m_values = src.m_values; + m_scalars = src.m_scalars; + return *this; + } + + template <typename SKey, typename SValue, typename SDevice> + typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value && + Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value + >::type + create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src) + { + if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) { + + insertable_map_type tmp; + + tmp.m_bounded_insert = src.m_bounded_insert; + tmp.m_hasher = src.m_hasher; + tmp.m_equal_to = src.m_equal_to; + tmp.m_size = src.size(); + tmp.m_available_indexes = bitset_type( src.capacity() ); + tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() ); + tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() ); + tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() ); + tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() ); + tmp.m_scalars = scalars_view("UnorderedMap scalars"); + + Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes); + + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy; + + raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0()); + raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0()); + raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0()); + if (!is_set) { + raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0()); + } + raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars ); + + *this = tmp; + } + } + + //@} +private: // private member functions + + bool modified() const + { + return get_flag(modified_idx); + } + + void set_flag(int flag) const + { + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + const int true_ = true; + raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int)); + } + + void reset_flag(int flag) const + { + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + const int false_ = false; + raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int)); + } + + bool get_flag(int flag) const + { + typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy; + int result = false; + raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int)); + return result; + } + + static uint32_t calculate_capacity(uint32_t capacity_hint) + { + // increase by 16% and round to nears multiple of 128 + return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u; + } + +private: // private members + bool m_bounded_insert; + hasher_type m_hasher; + equal_to_type m_equal_to; + mutable size_type m_size; + bitset_type m_available_indexes; + size_type_view m_hash_lists; + size_type_view m_next_index; + key_type_view m_keys; + value_type_view m_values; + scalars_view m_scalars; + + template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo> + friend class UnorderedMap; + + template <typename UMap> + friend struct Impl::UnorderedMapErase; + + template <typename UMap> + friend struct Impl::UnorderedMapHistogram; + + template <typename UMap> + friend struct Impl::UnorderedMapPrint; +}; + +// Specialization of deep_copy for two UnorderedMap objects. +template < typename DKey, typename DT, typename DDevice + , typename SKey, typename ST, typename SDevice + , typename Hasher, typename EqualTo > +inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst + , const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src ) +{ + dst.create_copy_view(src); +} + + +} // namespace Kokkos + +#endif //KOKKOS_UNORDERED_MAP_HPP diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6a360e8d19df8ef1330b43222d602bed58e28a9f --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp @@ -0,0 +1,283 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VECTOR_HPP +#define KOKKOS_VECTOR_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_DualView.hpp> + +/* Drop in replacement for std::vector based on Kokkos::DualView + * Most functions only work on the host (it will not compile if called from device kernel) + * + */ + namespace Kokkos { + +template< class Scalar, class Arg1Type = void> +class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> { + + typedef Scalar value_type; + typedef Scalar* pointer; + typedef const Scalar* const_pointer; + typedef Scalar* reference; + typedef const Scalar* const_reference; + typedef Scalar* iterator; + typedef const Scalar* const_iterator; + +private: + size_t _size; + typedef size_t size_type; + float _extra_storage; + typedef DualView<Scalar*,LayoutLeft,Arg1Type> DV; + + +public: +#ifdef KOKKOS_CUDA_USE_UVM + KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);}; + KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);}; +#else + inline Scalar& operator() (int i) const {return DV::h_view(i);}; + inline Scalar& operator[] (int i) const {return DV::h_view(i);}; +#endif + + /* Member functions which behave like std::vector functions */ + + vector():DV() { + _size = 0; + _extra_storage = 1.1; + DV::modified_host() = 1; + }; + + + vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) { + _size = n; + _extra_storage = 1.1; + DV::modified_host() = 1; + + assign(n,val); + } + + + void resize(size_t n) { + if(n>=capacity()) + DV::resize(size_t (n*_extra_storage)); + _size = n; + } + + void resize(size_t n, const Scalar& val) { + assign(n,val); + } + + void assign (size_t n, const Scalar& val) { + + /* Resize if necessary (behavour of std:vector) */ + + if(n>capacity()) + DV::resize(size_t (n*_extra_storage)); + _size = n; + + /* Assign value either on host or on device */ + + if( DV::modified_host() >= DV::modified_device() ) { + set_functor_host f(DV::h_view,val); + parallel_for(n,f); + DV::t_host::execution_space::fence(); + DV::modified_host()++; + } else { + set_functor f(DV::d_view,val); + parallel_for(n,f); + DV::t_dev::execution_space::fence(); + DV::modified_device()++; + } + } + + void reserve(size_t n) { + DV::resize(size_t (n*_extra_storage)); + } + + void push_back(Scalar val) { + DV::modified_host()++; + if(_size == capacity()) { + size_t new_size = _size*_extra_storage; + if(new_size == _size) new_size++; + DV::resize(new_size); + } + + DV::h_view(_size) = val; + _size++; + + }; + + void pop_back() { + _size--; + }; + + void clear() { + _size = 0; + } + + size_type size() const {return _size;}; + size_type max_size() const {return 2000000000;} + size_type capacity() const {return DV::capacity();}; + bool empty() const {return _size==0;}; + + iterator begin() const {return &DV::h_view(0);}; + + iterator end() const {return &DV::h_view(_size);}; + + + /* std::algorithms wich work originally with iterators, here they are implemented as member functions */ + + size_t + lower_bound (const size_t& start, + const size_t& theEnd, + const Scalar& comp_val) const + { + int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion + int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion + if (upper <= lower) { + return theEnd; + } + + Scalar lower_val = DV::h_view(lower); + Scalar upper_val = DV::h_view(upper); + size_t idx = (upper+lower)/2; + Scalar val = DV::h_view(idx); + if(val>upper_val) return upper; + if(val<lower_val) return start; + + while(upper>lower) { + if(comp_val>val) { + lower = ++idx; + } else { + upper = idx; + } + idx = (upper+lower)/2; + val = DV::h_view(idx); + } + return idx; + } + + bool is_sorted() { + for(int i=0;i<_size-1;i++) { + if(DV::h_view(i)>DV::h_view(i+1)) return false; + } + return true; + } + + iterator find(Scalar val) const { + if(_size == 0) return end(); + + int upper,lower,current; + current = _size/2; + upper = _size-1; + lower = 0; + + if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end(); + + while(upper>lower) + { + if(val>DV::h_view(current)) lower = current+1; + else upper = current; + current = (upper+lower)/2; + } + + if(val==DV::h_view(current)) return &DV::h_view(current); + else return end(); + } + + /* Additional functions for data management */ + + void device_to_host(){ + deep_copy(DV::h_view,DV::d_view); + } + void host_to_device() const { + deep_copy(DV::d_view,DV::h_view); + } + + void on_host() { + DV::modified_host() = DV::modified_device() + 1; + } + void on_device() { + DV::modified_device() = DV::modified_host() + 1; + } + + void set_overallocation(float extra) { + _extra_storage = 1.0 + extra; + } + + +public: + struct set_functor { + typedef typename DV::t_dev::execution_space execution_space; + typename DV::t_dev _data; + Scalar _val; + + set_functor(typename DV::t_dev data, Scalar val) : + _data(data),_val(val) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + _data(i) = _val; + } + }; + + struct set_functor_host { + typedef typename DV::t_host::execution_space execution_space; + typename DV::t_host _data; + Scalar _val; + + set_functor_host(typename DV::t_host data, Scalar val) : + _data(data),_val(val) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + _data(i) = _val; + } + }; + +}; + + +} +#endif diff --git a/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..df2fbed5a6709ca74edc0628fb45d39238da0ade --- /dev/null +++ b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BITSET_IMPL_HPP +#define KOKKOS_BITSET_IMPL_HPP + +#include <Kokkos_Macros.hpp> +#include <impl/Kokkos_BitOps.hpp> +#include <stdint.h> + +#include <cstdio> +#include <climits> +#include <iostream> +#include <iomanip> + +namespace Kokkos { +namespace Impl { + +KOKKOS_FORCEINLINE_FUNCTION +unsigned rotate_right( unsigned i, int r ) +{ + enum { size = static_cast<int>( sizeof(unsigned) * CHAR_BIT ) }; + return r ? ( ( i >> r ) | ( i << ( size - r ) ) ) : i ; +} + +template < typename Bitset > +struct BitsetCount +{ + typedef Bitset bitset_type; + typedef typename bitset_type::execution_space::execution_space execution_space; + typedef typename bitset_type::size_type size_type; + typedef size_type value_type; + + bitset_type m_bitset; + + BitsetCount( bitset_type const& bitset ) + : m_bitset(bitset) + {} + + size_type apply() const + { + size_type count = 0u; + parallel_reduce( m_bitset.m_blocks.dimension_0(), *this, count ); + return count; + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & count ) const + { + count = 0u; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & count, const volatile size_type & incr ) const + { + count += incr; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i, value_type & count ) const + { + count += bit_count( m_bitset.m_blocks[i] ); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif // KOKKOS_BITSET_IMPL_HPP diff --git a/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c87bb8a3a37cb6820d31bdd691cf447b20bbd185 --- /dev/null +++ b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp @@ -0,0 +1,195 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP +#define KOKKOS_FUNCTIONAL_IMPL_HPP + +#include <Kokkos_Macros.hpp> +#include <stdint.h> + +namespace Kokkos { namespace Impl { + +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. +KOKKOS_FORCEINLINE_FUNCTION +uint32_t getblock32 ( const uint8_t * p, int i ) +{ +// used to avoid aliasing error which could cause errors with +// forced inlining + return ((uint32_t)p[i*4+0]) + | ((uint32_t)p[i*4+1] << 8) + | ((uint32_t)p[i*4+2] << 16) + | ((uint32_t)p[i*4+3] << 24); +} + +KOKKOS_FORCEINLINE_FUNCTION +uint32_t rotl32 ( uint32_t x, int8_t r ) +{ return (x << r) | (x >> (32 - r)); } + +KOKKOS_FORCEINLINE_FUNCTION +uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +KOKKOS_INLINE_FUNCTION +uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + for(int i=0; i<nblocks; ++i) + { + uint32_t k1 = getblock32(data,i); + + k1 *= c1; + k1 = rotl32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = rotl32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + return h1; +} + + +#if defined( __GNUC__ ) /* GNU C */ || \ + defined( __GNUG__ ) /* GNU C++ */ || \ + defined( __clang__ ) + +#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__)) + +#else + +#define KOKKOS_MAY_ALIAS + +#endif + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +bool bitwise_equal(T const * const a_ptr, T const * const b_ptr) +{ + typedef uint64_t KOKKOS_MAY_ALIAS T64; + typedef uint32_t KOKKOS_MAY_ALIAS T32; + typedef uint16_t KOKKOS_MAY_ALIAS T16; + typedef uint8_t KOKKOS_MAY_ALIAS T8; + + enum { + NUM_8 = sizeof(T), + NUM_16 = NUM_8 / 2, + NUM_32 = NUM_8 / 4, + NUM_64 = NUM_8 / 8 + }; + + union { + T const * const ptr; + T64 const * const ptr64; + T32 const * const ptr32; + T16 const * const ptr16; + T8 const * const ptr8; + } a = {a_ptr}, b = {b_ptr}; + + bool result = true; + + for (int i=0; i < NUM_64; ++i) { + result = result && a.ptr64[i] == b.ptr64[i]; + } + + if ( NUM_64*2 < NUM_32 ) { + result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2]; + } + + if ( NUM_32*2 < NUM_16 ) { + result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2]; + } + + if ( NUM_16*2 < NUM_8 ) { + result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2]; + } + + return result; +} + + + +#undef KOKKOS_MAY_ALIAS + +}} // namespace Kokkos::Impl + +#endif //KOKKOS_FUNCTIONAL_IMPL_HPP diff --git a/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c52fc24359b8f7bd34489d94914ea304f7bc3425 --- /dev/null +++ b/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp @@ -0,0 +1,208 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP +#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > +inline +typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror +create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view , + typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 ) +{ + return view ; +} + +template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > +inline +typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror +create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ) +{ + // Force copy: + //typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused + typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ; + + typename staticcrsgraph_type::HostMirror tmp ; + typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map); + + // Allocation to match: + tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const' + tmp.entries = create_mirror( view.entries ); + + + // Deep copy: + deep_copy( tmp_row_map , view.row_map ); + deep_copy( tmp.entries , view.entries ); + + return tmp ; +} + +template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > +inline +typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror +create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view , + typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 ) +{ + return create_mirror( view ); +} +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class StaticCrsGraphType , class InputSizeType > +inline +typename StaticCrsGraphType::staticcrsgraph_type +create_staticcrsgraph( const std::string & label , + const std::vector< InputSizeType > & input ) +{ + typedef StaticCrsGraphType output_type ; + //typedef std::vector< InputSizeType > input_type ; // unused + + typedef typename output_type::entries_type entries_type ; + + typedef View< typename output_type::size_type [] , + typename output_type::array_layout , + typename output_type::execution_space > work_type ; + + output_type output ; + + // Create the row map: + + const size_t length = input.size(); + + { + work_type row_work( "tmp" , length + 1 ); + + typename work_type::HostMirror row_work_host = + create_mirror_view( row_work ); + + size_t sum = 0 ; + row_work_host[0] = 0 ; + for ( size_t i = 0 ; i < length ; ++i ) { + row_work_host[i+1] = sum += input[i]; + } + + deep_copy( row_work , row_work_host ); + + output.entries = entries_type( label , sum ); + output.row_map = row_work ; + } + + return output ; +} + +//---------------------------------------------------------------------------- + +template< class StaticCrsGraphType , class InputSizeType > +inline +typename StaticCrsGraphType::staticcrsgraph_type +create_staticcrsgraph( const std::string & label , + const std::vector< std::vector< InputSizeType > > & input ) +{ + typedef StaticCrsGraphType output_type ; + typedef typename output_type::entries_type entries_type ; + + static_assert( entries_type::rank == 1 + , "Graph entries view must be rank one" ); + + typedef View< typename output_type::size_type [] , + typename output_type::array_layout , + typename output_type::execution_space > work_type ; + + output_type output ; + + // Create the row map: + + const size_t length = input.size(); + + { + work_type row_work( "tmp" , length + 1 ); + + typename work_type::HostMirror row_work_host = + create_mirror_view( row_work ); + + size_t sum = 0 ; + row_work_host[0] = 0 ; + for ( size_t i = 0 ; i < length ; ++i ) { + row_work_host[i+1] = sum += input[i].size(); + } + + deep_copy( row_work , row_work_host ); + + output.entries = entries_type( label , sum ); + output.row_map = row_work ; + } + + // Fill in the entries: + { + typename entries_type::HostMirror host_entries = + create_mirror_view( output.entries ); + + size_t sum = 0 ; + for ( size_t i = 0 ; i < length ; ++i ) { + for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) { + host_entries( sum ) = input[i][j] ; + } + } + + deep_copy( output.entries , host_entries ); + } + + return output ; +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */ + diff --git a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..843fd3a8089999ab80b23506c2206e7a5de325e9 --- /dev/null +++ b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp @@ -0,0 +1,101 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_UnorderedMap.hpp> + +namespace Kokkos { namespace Impl { + +uint32_t find_hash_size(uint32_t size) +{ + if (size == 0u) return 0u; + + // these primes try to preserve randomness of hash + static const uint32_t primes [] = { + 3, 7, 13, 23, 53, 97, 193, 389, 769, 1543 + , 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539 + , 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827 + , 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289 + , 12967 , 13649 , 14341 , 15013 , 15727 + , 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329 + , 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439 + , 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619 + , 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963 + , 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579 + , 201653 , 211741 , 221813 , 231893 , 241979 , 252079 + , 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457 + , 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609 + , 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239 + , 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869 + , 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253 + , 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739 + , 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503 + , 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469 + , 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033 + , 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729 + , 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861 + , 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661 + , 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529 + , 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327 + , 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099 + , 55924061 , 58161041 , 60397993 , 62634959 , 64871921 + , 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427 + , 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971 + , 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141 + , 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237 + , 232644089 , 241591943 , 250539763 , 259487603 , 268435399 + }; + + const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t); + + uint32_t hsize = primes[num_primes-1] ; + for (uint32_t i = 0; i < num_primes; ++i) { + if (size <= primes[i]) { + hsize = primes[i]; + break; + } + } + return hsize; +} + +}} // namespace Kokkos::Impl + diff --git a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b788c966e9c5a04d0ce4ca626190d241ec273008 --- /dev/null +++ b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp @@ -0,0 +1,297 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP +#define KOKKOS_UNORDERED_MAP_IMPL_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <stdint.h> + +#include <cstdio> +#include <climits> +#include <iostream> +#include <iomanip> + +namespace Kokkos { namespace Impl { + +uint32_t find_hash_size( uint32_t size ); + +template <typename Map> +struct UnorderedMapRehash +{ + typedef Map map_type; + typedef typename map_type::const_map_type const_map_type; + typedef typename map_type::execution_space execution_space; + typedef typename map_type::size_type size_type; + + map_type m_dst; + const_map_type m_src; + + UnorderedMapRehash( map_type const& dst, const_map_type const& src) + : m_dst(dst), m_src(src) + {} + + void apply() const + { + parallel_for(m_src.capacity(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + if ( m_src.valid_at(i) ) + m_dst.insert(m_src.key_at(i), m_src.value_at(i)); + } + +}; + +template <typename UMap> +struct UnorderedMapErase +{ + typedef UMap map_type; + typedef typename map_type::execution_space execution_space; + typedef typename map_type::size_type size_type; + typedef typename map_type::key_type key_type; + typedef typename map_type::impl_value_type value_type; + + map_type m_map; + + UnorderedMapErase( map_type const& map) + : m_map(map) + {} + + void apply() const + { + parallel_for(m_map.m_hash_lists.dimension_0(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + const size_type invalid_index = map_type::invalid_index; + + size_type curr = m_map.m_hash_lists(i); + size_type next = invalid_index; + + // remove erased head of the linked-list + while (curr != invalid_index && !m_map.valid_at(curr)) { + next = m_map.m_next_index[curr]; + m_map.m_next_index[curr] = invalid_index; + m_map.m_keys[curr] = key_type(); + if (m_map.is_set) m_map.m_values[curr] = value_type(); + curr = next; + m_map.m_hash_lists(i) = next; + } + + // if the list is non-empty and the head is valid + if (curr != invalid_index && m_map.valid_at(curr) ) { + size_type prev = curr; + curr = m_map.m_next_index[prev]; + + while (curr != invalid_index) { + next = m_map.m_next_index[curr]; + if (m_map.valid_at(curr)) { + prev = curr; + } + else { + // remove curr from list + m_map.m_next_index[prev] = next; + m_map.m_next_index[curr] = invalid_index; + m_map.m_keys[curr] = key_type(); + if (map_type::is_set) m_map.m_values[curr] = value_type(); + } + curr = next; + } + } + } +}; + +template <typename UMap> +struct UnorderedMapHistogram +{ + typedef UMap map_type; + typedef typename map_type::execution_space execution_space; + typedef typename map_type::size_type size_type; + + typedef View<int[100], execution_space> histogram_view; + typedef typename histogram_view::HostMirror host_histogram_view; + + map_type m_map; + histogram_view m_length; + histogram_view m_distance; + histogram_view m_block_distance; + + UnorderedMapHistogram( map_type const& map) + : m_map(map) + , m_length("UnorderedMap Histogram") + , m_distance("UnorderedMap Histogram") + , m_block_distance("UnorderedMap Histogram") + {} + + void calculate() + { + parallel_for(m_map.m_hash_lists.dimension_0(), *this); + } + + void clear() + { + Kokkos::deep_copy(m_length, 0); + Kokkos::deep_copy(m_distance, 0); + Kokkos::deep_copy(m_block_distance, 0); + } + + void print_length(std::ostream &out) + { + host_histogram_view host_copy = create_mirror_view(m_length); + Kokkos::deep_copy(host_copy, m_length); + + for (int i=0, size = host_copy.dimension_0(); i<size; ++i) + { + out << host_copy[i] << " , "; + } + out << "\b\b\b " << std::endl; + } + + void print_distance(std::ostream &out) + { + host_histogram_view host_copy = create_mirror_view(m_distance); + Kokkos::deep_copy(host_copy, m_distance); + + for (int i=0, size = host_copy.dimension_0(); i<size; ++i) + { + out << host_copy[i] << " , "; + } + out << "\b\b\b " << std::endl; + } + + void print_block_distance(std::ostream &out) + { + host_histogram_view host_copy = create_mirror_view(m_block_distance); + Kokkos::deep_copy(host_copy, m_block_distance); + + for (int i=0, size = host_copy.dimension_0(); i<size; ++i) + { + out << host_copy[i] << " , "; + } + out << "\b\b\b " << std::endl; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + const size_type invalid_index = map_type::invalid_index; + + uint32_t length = 0; + size_type min_index = ~0u, max_index = 0; + for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) { + ++length; + min_index = (curr < min_index) ? curr : min_index; + max_index = (max_index < curr) ? curr : max_index; + } + + size_type distance = (0u < length) ? max_index - min_index : 0u; + size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u; + + // normalize data + length = length < 100u ? length : 99u; + distance = distance < 100u ? distance : 99u; + blocks = blocks < 100u ? blocks : 99u; + + if (0u < length) + { + atomic_fetch_add( &m_length(length), 1); + atomic_fetch_add( &m_distance(distance), 1); + atomic_fetch_add( &m_block_distance(blocks), 1); + } + } +}; + +template <typename UMap> +struct UnorderedMapPrint +{ + typedef UMap map_type; + typedef typename map_type::execution_space execution_space; + typedef typename map_type::size_type size_type; + + map_type m_map; + + UnorderedMapPrint( map_type const& map) + : m_map(map) + {} + + void apply() + { + parallel_for(m_map.m_hash_lists.dimension_0(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + const size_type invalid_index = map_type::invalid_index; + + uint32_t list = m_map.m_hash_lists(i); + for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) { + printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr)); + } + } +}; + +template <typename DKey, typename DValue, typename SKey, typename SValue> +struct UnorderedMapCanAssign : public false_ {}; + +template <typename Key, typename Value> +struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {}; + +template <typename Key, typename Value> +struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {}; + +template <typename Key, typename Value> +struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {}; + +template <typename Key, typename Value> +struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {}; + + +}} //Kokkos::Impl + +#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7fff0f835bb2e704914fe5df16556d6c4199a916 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -0,0 +1,40 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + UnitTestMain.cpp + TestCuda.cpp + ) + +SET(LIBRARIES kokkoscore) + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES + TestThreads.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Serial) + LIST( APPEND SOURCES + TestSerial.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES + TestOpenMP.cpp + ) +ENDIF() + + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest + SOURCES ${SOURCES} + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) + diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..48e3ff61d04b9de210a7f1976217f4d1aca9e8e8 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -0,0 +1,92 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o + TARGETS += KokkosContainers_UnitTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + TARGETS += KokkosContainers_UnitTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o + TARGETS += KokkosContainers_UnitTest_OpenMP + TEST_TARGETS += test-openmp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + TARGETS += KokkosContainers_UnitTest_Serial + TEST_TARGETS += test-serial +endif + +KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda + +KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads + +KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP + +KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial + +test-cuda: KokkosContainers_UnitTest_Cuda + ./KokkosContainers_UnitTest_Cuda + +test-threads: KokkosContainers_UnitTest_Threads + ./KokkosContainers_UnitTest_Threads + +test-openmp: KokkosContainers_UnitTest_OpenMP + ./KokkosContainers_UnitTest_OpenMP + +test-serial: KokkosContainers_UnitTest_Serial + ./KokkosContainers_UnitTest_Serial + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/containers/unit_tests/TestBitset.hpp b/lib/kokkos/containers/unit_tests/TestBitset.hpp new file mode 100644 index 0000000000000000000000000000000000000000..76fb30edcb68aa37f7beb55352212211bcf586c3 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestBitset.hpp @@ -0,0 +1,285 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_BITSET_HPP +#define KOKKOS_TEST_BITSET_HPP + +#include <gtest/gtest.h> +#include <iostream> + + +namespace Test { + +namespace Impl { + +template <typename Bitset, bool Set> +struct TestBitset +{ + typedef Bitset bitset_type; + typedef typename bitset_type::execution_space execution_space; + typedef uint32_t value_type; + + bitset_type m_bitset; + + TestBitset( bitset_type const& bitset) + : m_bitset(bitset) + {} + + unsigned testit(unsigned collisions) + { + execution_space::fence(); + + unsigned count = 0; + Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count); + return count; + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + i = i % m_bitset.size(); + if (Set) { + if (m_bitset.set(i)) { + if (m_bitset.test(i)) ++v; + } + } + else { + if (m_bitset.reset(i)) { + if (!m_bitset.test(i)) ++v; + } + } + } + +}; + +template <typename Bitset> +struct TestBitsetTest +{ + typedef Bitset bitset_type; + typedef typename bitset_type::execution_space execution_space; + typedef uint32_t value_type; + + bitset_type m_bitset; + + TestBitsetTest( bitset_type const& bitset) + : m_bitset(bitset) + {} + + unsigned testit() + { + execution_space::fence(); + + unsigned count = 0; + Kokkos::parallel_reduce( m_bitset.size(), *this, count); + return count; + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + if (m_bitset.test( i )) ++v; + } +}; + +template <typename Bitset, bool Set> +struct TestBitsetAny +{ + typedef Bitset bitset_type; + typedef typename bitset_type::execution_space execution_space; + typedef uint32_t value_type; + + bitset_type m_bitset; + + TestBitsetAny( bitset_type const& bitset) + : m_bitset(bitset) + {} + + unsigned testit() + { + execution_space::fence(); + + unsigned count = 0; + Kokkos::parallel_reduce( m_bitset.size(), *this, count); + return count; + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + bool result = false; + unsigned attempts = 0; + uint32_t hint = (i >> 4) << 4; + while (attempts < m_bitset.max_hint()) { + if (Set) { + Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i); + if (result && m_bitset.set(hint)) { + ++v; + break; + } + else if (!result) { + ++attempts; + } + } + else { + Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i); + if (result && m_bitset.reset(hint)) { + ++v; + break; + } + else if (!result) { + ++attempts; + } + } + } + } + +}; +} // namespace Impl + + + +template <typename Device> +void test_bitset() +{ + typedef Kokkos::Bitset< Device > bitset_type; + typedef Kokkos::ConstBitset< Device > const_bitset_type; + + //unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 }; + unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 }; + + for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) { + + //std::cout << "Bitset " << test_sizes[i] << std::endl; + + bitset_type bitset(test_sizes[i]); + + //std::cout << " Check inital count " << std::endl; + // nothing should be set + { + Impl::TestBitsetTest< bitset_type > f(bitset); + uint32_t count = f.testit(); + EXPECT_EQ(0u, count); + EXPECT_EQ(count, bitset.count()); + } + + //std::cout << " Check set() " << std::endl; + bitset.set(); + // everything should be set + { + Impl::TestBitsetTest< const_bitset_type > f(bitset); + uint32_t count = f.testit(); + EXPECT_EQ(bitset.size(), count); + EXPECT_EQ(count, bitset.count()); + } + + //std::cout << " Check reset() " << std::endl; + bitset.reset(); + EXPECT_EQ(0u, bitset.count()); + + //std::cout << " Check set(i) " << std::endl; + // test setting bits + { + Impl::TestBitset< bitset_type, true > f(bitset); + uint32_t count = f.testit(10u); + EXPECT_EQ( bitset.size(), bitset.count()); + EXPECT_EQ( bitset.size(), count ); + } + + //std::cout << " Check reset(i) " << std::endl; + // test resetting bits + { + Impl::TestBitset< bitset_type, false > f(bitset); + uint32_t count = f.testit(10u); + EXPECT_EQ( bitset.size(), count); + EXPECT_EQ( 0u, bitset.count() ); + } + + + //std::cout << " Check find_any_set(i) " << std::endl; + // test setting any bits + { + Impl::TestBitsetAny< bitset_type, true > f(bitset); + uint32_t count = f.testit(); + EXPECT_EQ( bitset.size(), bitset.count()); + EXPECT_EQ( bitset.size(), count ); + } + + //std::cout << " Check find_any_unset(i) " << std::endl; + // test resetting any bits + { + Impl::TestBitsetAny< bitset_type, false > f(bitset); + uint32_t count = f.testit(); + EXPECT_EQ( bitset.size(), count); + EXPECT_EQ( 0u, bitset.count() ); + } + + } + +} + +} // namespace Test + +#endif //KOKKOS_TEST_BITSET_HPP + diff --git a/lib/kokkos/containers/unit_tests/TestComplex.hpp b/lib/kokkos/containers/unit_tests/TestComplex.hpp new file mode 100644 index 0000000000000000000000000000000000000000..94c04b61f46759d91f0738723d487980c8cb2a83 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestComplex.hpp @@ -0,0 +1,263 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + + +#ifndef KOKKOS_TEST_COMPLEX_HPP +#define KOKKOS_TEST_COMPLEX_HPP + +#include <Kokkos_Complex.hpp> +#include <gtest/gtest.h> +#include <iostream> + +namespace Test { + +namespace Impl { + template <typename RealType> + void testComplexConstructors () { + typedef Kokkos::complex<RealType> complex_type; + + complex_type z1; + complex_type z2 (0.0, 0.0); + complex_type z3 (1.0, 0.0); + complex_type z4 (0.0, 1.0); + complex_type z5 (-1.0, -2.0); + + ASSERT_TRUE( z1 == z2 ); + ASSERT_TRUE( z1 != z3 ); + ASSERT_TRUE( z1 != z4 ); + ASSERT_TRUE( z1 != z5 ); + + ASSERT_TRUE( z2 != z3 ); + ASSERT_TRUE( z2 != z4 ); + ASSERT_TRUE( z2 != z5 ); + + ASSERT_TRUE( z3 != z4 ); + ASSERT_TRUE( z3 != z5 ); + + complex_type z6 (-1.0, -2.0); + ASSERT_TRUE( z5 == z6 ); + + // Make sure that complex has value semantics, in particular, that + // equality tests use values and not pointers, so that + // reassignment actually changes the value. + z1 = complex_type (-3.0, -4.0); + ASSERT_TRUE( z1.real () == -3.0 ); + ASSERT_TRUE( z1.imag () == -4.0 ); + ASSERT_TRUE( z1 != z2 ); + + complex_type z7 (1.0); + ASSERT_TRUE( z3 == z7 ); + ASSERT_TRUE( z7 == 1.0 ); + ASSERT_TRUE( z7 != -1.0 ); + + z7 = complex_type (5.0); + ASSERT_TRUE( z7.real () == 5.0 ); + ASSERT_TRUE( z7.imag () == 0.0 ); + } + + template <typename RealType> + void testPlus () { + typedef Kokkos::complex<RealType> complex_type; + + complex_type z1 (1.0, -1.0); + complex_type z2 (-1.0, 1.0); + complex_type z3 = z1 + z2; + ASSERT_TRUE( z3 == complex_type (0.0, 0.0) ); + } + + template <typename RealType> + void testMinus () { + typedef Kokkos::complex<RealType> complex_type; + + // Test binary minus. + complex_type z1 (1.0, -1.0); + complex_type z2 (-1.0, 1.0); + complex_type z3 = z1 - z2; + ASSERT_TRUE( z3 == complex_type (2.0, -2.0) ); + + // Test unary minus. + complex_type z4 (3.0, -4.0); + ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) ); + } + + template <typename RealType> + void testTimes () { + typedef Kokkos::complex<RealType> complex_type; + + complex_type z1 (1.0, -1.0); + complex_type z2 (-1.0, 1.0); + complex_type z3 = z1 * z2; + ASSERT_TRUE( z3 == complex_type (0.0, 2.0) ); + + // Make sure that std::complex * Kokkos::complex works too. + std::complex<RealType> z4 (-1.0, 1.0); + complex_type z5 = z4 * z1; + ASSERT_TRUE( z5 == complex_type (0.0, 2.0) ); + } + + template <typename RealType> + void testDivide () { + typedef Kokkos::complex<RealType> complex_type; + + // Test division of a complex number by a real number. + complex_type z1 (1.0, -1.0); + complex_type z2 (1.0 / 2.0, -1.0 / 2.0); + ASSERT_TRUE( z1 / 2.0 == z2 ); + + // (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i)) + // (-1+2i)(1+i) == -3 + i + complex_type z3 (-1.0, 2.0); + complex_type z4 (1.0, -1.0); + complex_type z5 (-3.0, 1.0); + ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 ); + + // Test division of a complex number by a complex number. + // This assumes that RealType is a floating-point type. + complex_type z6 (Kokkos::real (z5) / 2.0, + Kokkos::imag (z5) / 2.0); + + complex_type z7 = z3 / z4; + ASSERT_TRUE( z7 == z6 ); + } + + template <typename RealType> + void testOutsideKernel () { + testComplexConstructors<RealType> (); + testPlus<RealType> (); + testTimes<RealType> (); + testDivide<RealType> (); + } + + + template<typename RealType, typename Device> + void testCreateView () { + typedef Kokkos::complex<RealType> complex_type; + Kokkos::View<complex_type*, Device> x ("x", 10); + ASSERT_TRUE( x.dimension_0 () == 10 ); + + // Test that View assignment works. + Kokkos::View<complex_type*, Device> x_nonconst = x; + Kokkos::View<const complex_type*, Device> x_const = x; + } + + template<typename RealType, typename Device> + class Fill { + public: + typedef typename Device::execution_space execution_space; + + typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type; + typedef typename view_type::size_type size_type; + + KOKKOS_INLINE_FUNCTION + void operator () (const size_type i) const { + x_(i) = val_; + } + + Fill (const view_type& x, const Kokkos::complex<RealType>& val) : + x_ (x), val_ (val) + {} + + private: + view_type x_; + const Kokkos::complex<RealType> val_; + }; + + template<typename RealType, typename Device> + class Sum { + public: + typedef typename Device::execution_space execution_space; + + typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type; + typedef typename view_type::size_type size_type; + typedef Kokkos::complex<RealType> value_type; + + KOKKOS_INLINE_FUNCTION + void operator () (const size_type i, Kokkos::complex<RealType>& sum) const { + sum += x_(i); + } + + Sum (const view_type& x) : x_ (x) {} + + private: + view_type x_; + }; + + template<typename RealType, typename Device> + void testInsideKernel () { + typedef Kokkos::complex<RealType> complex_type; + typedef Kokkos::View<complex_type*, Device> view_type; + typedef typename view_type::size_type size_type; + + const size_type N = 1000; + view_type x ("x", N); + ASSERT_TRUE( x.dimension_0 () == N ); + + // Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) { + // result += x[i]; + // }); + + Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0))); + + complex_type sum; + Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum); + + ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 ); + } +} // namespace Impl + + +template <typename Device> +void testComplex () +{ + Impl::testOutsideKernel<float> (); + Impl::testOutsideKernel<double> (); + + Impl::testCreateView<float, Device> (); + Impl::testCreateView<double, Device> (); + + Impl::testInsideKernel<float, Device> (); + Impl::testInsideKernel<double, Device> (); +} + + +} // namespace Test + +#endif // KOKKOS_TEST_COMPLEX_HPP diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e30160b24e3a57d927924067d171ee8b49540357 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp @@ -0,0 +1,227 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <iomanip> +#include <stdint.h> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <Kokkos_Bitset.hpp> +#include <Kokkos_UnorderedMap.hpp> +#include <Kokkos_Vector.hpp> + +#include <TestBitset.hpp> +#include <TestUnorderedMap.hpp> +#include <TestStaticCrsGraph.hpp> +#include <TestVector.hpp> +#include <TestDualView.hpp> +#include <TestDynamicView.hpp> +#include <TestSegmentedView.hpp> + +#include <Kokkos_DynRankView.hpp> +#include <TestDynViewAPI.hpp> + +//---------------------------------------------------------------------------- + + +#ifdef KOKKOS_HAVE_CUDA + +namespace Test { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( cuda , dyn_view_api) { + TestDynViewAPI< double , Kokkos::Cuda >(); +} + +TEST_F( cuda , staticcrsgraph ) +{ + TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >(); + TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >(); +} + + +void cuda_test_insert_close( uint32_t num_nodes + , uint32_t num_inserts + , uint32_t num_duplicates + ) +{ + test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true); +} + +void cuda_test_insert_far( uint32_t num_nodes + , uint32_t num_inserts + , uint32_t num_duplicates + ) +{ + test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false); +} + +void cuda_test_failed_insert( uint32_t num_nodes ) +{ + test_failed_insert< Kokkos::Cuda >( num_nodes ); +} + +void cuda_test_deep_copy( uint32_t num_nodes ) +{ + test_deep_copy< Kokkos::Cuda >( num_nodes ); +} + +void cuda_test_vector_combinations(unsigned int size) +{ + test_vector_combinations<int,Kokkos::Cuda>(size); +} + +void cuda_test_dualview_combinations(unsigned int size) +{ + test_dualview_combinations<int,Kokkos::Cuda>(size); +} + +void cuda_test_segmented_view(unsigned int size) +{ + test_segmented_view<double,Kokkos::Cuda>(size); +} + +void cuda_test_bitset() +{ + test_bitset<Kokkos::Cuda>(); +} + + + +/*TEST_F( cuda, bitset ) +{ + cuda_test_bitset(); +}*/ + +#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \ + TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \ + } + +#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \ + TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + cuda_test_failed_insert(num_nodes); \ + } + +#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \ + TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + cuda_test_assignment_operators(num_nodes); \ + } + +#define CUDA_DEEP_COPY( num_nodes, repeat ) \ + TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + cuda_test_deep_copy(num_nodes); \ + } + +#define CUDA_VECTOR_COMBINE_TEST( size ) \ + TEST_F( cuda, vector_combination##size##x) { \ + cuda_test_vector_combinations(size); \ + } + +#define CUDA_DUALVIEW_COMBINE_TEST( size ) \ + TEST_F( cuda, dualview_combination##size##x) { \ + cuda_test_dualview_combinations(size); \ + } + +#define CUDA_SEGMENTEDVIEW_TEST( size ) \ + TEST_F( cuda, segmentedview_##size##x) { \ + cuda_test_segmented_view(size); \ + } + +CUDA_DUALVIEW_COMBINE_TEST( 10 ) +CUDA_VECTOR_COMBINE_TEST( 10 ) +CUDA_VECTOR_COMBINE_TEST( 3057 ) + + +CUDA_INSERT_TEST(close, 100000, 90000, 100, 500) +CUDA_INSERT_TEST(far, 100000, 90000, 100, 500) +CUDA_DEEP_COPY( 10000, 1 ) +CUDA_FAILED_INSERT_TEST( 10000, 1000 ) +CUDA_SEGMENTEDVIEW_TEST( 200 ) + + +#undef CUDA_INSERT_TEST +#undef CUDA_FAILED_INSERT_TEST +#undef CUDA_ASSIGNEMENT_TEST +#undef CUDA_DEEP_COPY +#undef CUDA_VECTOR_COMBINE_TEST +#undef CUDA_DUALVIEW_COMBINE_TEST +#undef CUDA_SEGMENTEDVIEW_TEST + + +TEST_F( cuda , dynamic_view ) +{ + typedef TestDynamicView< double , Kokkos::CudaUVMSpace > + TestDynView ; + + for ( int i = 0 ; i < 10 ; ++i ) { + TestDynView::run( 100000 + 100 * i ); + } +} + + +} + +#endif /* #ifdef KOKKOS_HAVE_CUDA */ + diff --git a/lib/kokkos/containers/unit_tests/TestDualView.hpp b/lib/kokkos/containers/unit_tests/TestDualView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e72c69f7d41cf7d493becfcbb863e5f1d9f6679f --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestDualView.hpp @@ -0,0 +1,121 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_DUALVIEW_HPP +#define KOKKOS_TEST_DUALVIEW_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <impl/Kokkos_Timer.hpp> + +namespace Test { + +namespace Impl { + + template <typename Scalar, class Device> + struct test_dualview_combinations + { + typedef test_dualview_combinations<Scalar,Device> self_type; + + typedef Scalar scalar_type; + typedef Device execution_space; + + Scalar reference; + Scalar result; + + template <typename ViewType> + Scalar run_me(unsigned int n,unsigned int m){ + if(n<10) n = 10; + if(m<3) m = 3; + ViewType a("A",n,m); + + Kokkos::deep_copy( a.d_view , 1 ); + + a.template modify<typename ViewType::execution_space>(); + a.template sync<typename ViewType::host_mirror_space>(); + + a.h_view(5,1) = 3; + a.h_view(6,1) = 4; + a.h_view(7,2) = 5; + a.template modify<typename ViewType::host_mirror_space>(); + ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1)); + a.template sync<typename ViewType::execution_space>(); + b.template modify<typename ViewType::execution_space>(); + + Kokkos::deep_copy( b.d_view , 2 ); + + a.template sync<typename ViewType::host_mirror_space>(); + Scalar count = 0; + for(unsigned int i = 0; i<a.d_view.dimension_0(); i++) + for(unsigned int j = 0; j<a.d_view.dimension_1(); j++) + count += a.h_view(i,j); + return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2; + } + + + test_dualview_combinations(unsigned int size) + { + result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3); + } + + }; + +} // namespace Impl + + + + +template <typename Scalar, typename Device> +void test_dualview_combinations(unsigned int size) +{ + Impl::test_dualview_combinations<Scalar,Device> test(size); + ASSERT_EQ( test.result,0); + +} + + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e71ccc0091f0ad8c67de46fe91b4b08e43dcc27d --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -0,0 +1,1559 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> + +/*--------------------------------------------------------------------------*/ + + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< class T , class ... P > +size_t allocation_count( const Kokkos::Experimental::DynRankView<T,P...> & view ) +{ + const size_t card = view.size(); + const size_t alloc = view.span(); + + return card <= alloc ? alloc : 0 ; +} + +/*--------------------------------------------------------------------------*/ + +template< typename T, class DeviceType> +struct TestViewOperator +{ + typedef DeviceType execution_space ; + + static const unsigned N = 100 ; + static const unsigned D = 3 ; + + typedef Kokkos::Experimental::DynRankView< T , execution_space > view_type ; + + const view_type v1 ; + const view_type v2 ; + + TestViewOperator() + : v1( "v1" , N , D ) + , v2( "v2" , N , D ) + {} + + static void testit() + { + Kokkos::parallel_for( N , TestViewOperator() ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned i ) const + { + const unsigned X = 0 ; + const unsigned Y = 1 ; + const unsigned Z = 2 ; + + v2(i,X) = v1(i,X); + v2(i,Y) = v1(i,Y); + v2(i,Z) = v1(i,Z); + } +}; + +/*--------------------------------------------------------------------------*/ + +template< class DataType , + class DeviceType , + unsigned Rank > +struct TestViewOperator_LeftAndRight ; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6 ) + : left( "left" , N0, N1, N2, N3, N4, N5, N6 ) + , right( "right" , N0, N1, N2, N3, N4, N5, N6 ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6 ) + { + TestViewOperator_LeftAndRight driver(N0, N1, N2, N3, N4, N5, N6 ); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) + for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4, i5, i6 ) - + & left( 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) + { + const long j = & right( i0, i1, i2, i3, i4, i5, i6 ) - + & right( 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5 ) + : left( "left" , N0, N1, N2, N3, N4, N5 ) + , right( "right" , N0, N1, N2, N3, N4, N5 ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5) + { + TestViewOperator_LeftAndRight driver (N0, N1, N2, N3, N4, N5); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4, i5 ) - + & left( 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + { + const long j = & right( i0, i1, i2, i3, i4, i5 ) - + & right( 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4 ) + : left( "left" , N0, N1, N2, N3, N4 ) + , right( "right" , N0, N1, N2, N3, N4 ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4) + { + TestViewOperator_LeftAndRight driver(N0, N1, N2, N3, N4); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4 ) - + & left( 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + + if ( & left( i0, i1, i2, i3, i4 ) != + & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4 ; } + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + { + const long j = & right( i0, i1, i2, i3, i4 ) - + & right( 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + + if ( & right( i0, i1, i2, i3, i4 ) != + & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8 ; } + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1, unsigned N2, unsigned N3) + : left( "left" , N0, N1, N2, N3 ) + , right( "right" , N0, N1, N2, N3 ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1, unsigned N2, unsigned N3) + { + TestViewOperator_LeftAndRight driver (N0, N1, N2, N3); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3 ) - + & left( 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + { + const long j = & right( i0, i1, i2, i3 ) - + & right( 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1, unsigned N2) + : left( std::string("left") , N0, N1, N2 ) + , right( std::string("right") , N0, N1, N2 ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1, unsigned N2) + { + TestViewOperator_LeftAndRight driver (N0, N1, N2); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2 ) - + & left( 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + + if ( & left(i0,i1,i2) != & left_stride(i0,i1,i2) ) { update |= 4 ; } + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + { + const long j = & right( i0, i1, i2 ) - + & right( 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + + if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; } + } + + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + { + if ( & left(i0,i1,i2) != & left(i0,i1,i2,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0) ) { update |= 3 ; } + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0, unsigned N1) + : left( "left" , N0, N1 ) + , right( "right" , N0, N1 ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0, unsigned N1) + { + TestViewOperator_LeftAndRight driver(N0, N1); + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1 ) - + & left( 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + { + const long j = & right( i0, i1 ) - + & right( 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + { + if ( & left(i0,i1) != & left(i0,i1,0,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0) ) { update |= 3 ; } + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + Experimental::DynRankView< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight(unsigned N0) + : left( "left" , N0 ) + , right( "right" , N0 ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit(unsigned N0) + { + TestViewOperator_LeftAndRight driver (N0) ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + if ( & left(i0) != & left(i0,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0) != & right(i0,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } + if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } + } + } +}; + +/*--------------------------------------------------------------------------*/ + +template< typename T, class DeviceType > +class TestDynViewAPI +{ +public: + typedef DeviceType device ; + + enum { N0 = 1000 , + N1 = 3 , + N2 = 5 , + N3 = 7 }; + + typedef Kokkos::Experimental::DynRankView< T , device > dView0 ; + typedef Kokkos::Experimental::DynRankView< const T , device > const_dView0 ; + + typedef Kokkos::Experimental::DynRankView< T, device, Kokkos::MemoryUnmanaged > dView0_unmanaged ; + typedef typename dView0::host_mirror_space host_drv_space ; + + typedef Kokkos::Experimental::View< T , device > View0 ; + typedef Kokkos::Experimental::View< T* , device > View1 ; + typedef Kokkos::Experimental::View< T******* , device > View7 ; + + typedef typename View0::host_mirror_space host_view_space ; + + TestDynViewAPI() + { + run_test_resize_realloc(); + run_test_mirror(); + run_test_scalar(); + run_test(); + run_test_const(); + run_test_subview(); + run_test_subview_strided(); + run_test_vector(); + + TestViewOperator< T , device >::testit(); + TestViewOperator_LeftAndRight< int , device , 7 >::testit(2,3,4,2,3,4,2); + TestViewOperator_LeftAndRight< int , device , 6 >::testit(2,3,4,2,3,4); + TestViewOperator_LeftAndRight< int , device , 5 >::testit(2,3,4,2,3); + TestViewOperator_LeftAndRight< int , device , 4 >::testit(2,3,4,2); + TestViewOperator_LeftAndRight< int , device , 3 >::testit(2,3,4); + TestViewOperator_LeftAndRight< int , device , 2 >::testit(2,3); + TestViewOperator_LeftAndRight< int , device , 1 >::testit(2); + } + + static void run_test_resize_realloc() + { + dView0 drv0("drv0", 10, 20, 30); + ASSERT_EQ( drv0.rank(), 3); + + Kokkos::Experimental::resize(drv0, 5, 10); + ASSERT_EQ( drv0.rank(), 2); + ASSERT_EQ( drv0.dimension_0(), 5); + ASSERT_EQ( drv0.dimension_1(), 10); + ASSERT_EQ( drv0.dimension_2(), 1); + + Kokkos::Experimental::realloc(drv0, 10, 20); + ASSERT_EQ( drv0.rank(), 2); + ASSERT_EQ( drv0.dimension_0(), 10); + ASSERT_EQ( drv0.dimension_1(), 20); + ASSERT_EQ( drv0.dimension_2(), 1); + + } + + static void run_test_mirror() + { + typedef Kokkos::Experimental::DynRankView< int , host_drv_space > view_type ; + typedef typename view_type::HostMirror mirror_type ; + view_type a("a"); + mirror_type am = Kokkos::Experimental::create_mirror_view(a); + mirror_type ax = Kokkos::Experimental::create_mirror(a); + ASSERT_EQ( & a() , & am() ); + ASSERT_EQ( a.rank() , am.rank() ); + ASSERT_EQ( ax.rank() , am.rank() ); + + if (Kokkos::HostSpace::execution_space::is_initialized() ) + { + Kokkos::DynRankView<double, Kokkos::LayoutLeft, Kokkos::HostSpace> a_h("A",1000); + auto a_h2 = Kokkos::create_mirror(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror(typename device::memory_space(),a_h); + + int equal_ptr_h_h2 = (a_h.data() ==a_h2.data())?1:0; + int equal_ptr_h_d = (a_h.data() ==a_d. data())?1:0; + int equal_ptr_h2_d = (a_h2.data()==a_d. data())?1:0; + + ASSERT_EQ(equal_ptr_h_h2,0); + ASSERT_EQ(equal_ptr_h_d ,0); + ASSERT_EQ(equal_ptr_h2_d,0); + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + + ASSERT_EQ(a_h.rank(),a_h2.rank()); + ASSERT_EQ(a_h.rank(),a_d.rank()); + } + if (Kokkos::HostSpace::execution_space::is_initialized() ) + { + Kokkos::DynRankView<double, Kokkos::LayoutRight, Kokkos::HostSpace> a_h("A",1000); + auto a_h2 = Kokkos::create_mirror(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror(typename device::memory_space(),a_h); + + int equal_ptr_h_h2 = (a_h.data() ==a_h2.data())?1:0; + int equal_ptr_h_d = (a_h.data() ==a_d. data())?1:0; + int equal_ptr_h2_d = (a_h2.data()==a_d. data())?1:0; + + ASSERT_EQ(equal_ptr_h_h2,0); + ASSERT_EQ(equal_ptr_h_d ,0); + ASSERT_EQ(equal_ptr_h2_d,0); + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + + ASSERT_EQ(a_h.rank(),a_h2.rank()); + ASSERT_EQ(a_h.rank(),a_d.rank()); + } + + if (Kokkos::HostSpace::execution_space::is_initialized() ) + { + Kokkos::DynRankView<double, Kokkos::LayoutLeft, Kokkos::HostSpace> a_h("A",1000); + auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror_view(typename device::memory_space(),a_h); + + int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; + int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; + int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; + + int is_same_memspace = std::is_same<Kokkos::HostSpace,typename device::memory_space>::value?1:0; + ASSERT_EQ(equal_ptr_h_h2,1); + ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); + ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + + ASSERT_EQ(a_h.rank(),a_h2.rank()); + ASSERT_EQ(a_h.rank(),a_d.rank()); + } + if (Kokkos::HostSpace::execution_space::is_initialized() ) + { + Kokkos::DynRankView<double, Kokkos::LayoutRight, Kokkos::HostSpace> a_h("A",1000); + auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror_view(typename device::memory_space(),a_h); + + int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; + int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; + int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; + + int is_same_memspace = std::is_same<Kokkos::HostSpace,typename device::memory_space>::value?1:0; + ASSERT_EQ(equal_ptr_h_h2,1); + ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); + ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + + ASSERT_EQ(a_h.rank(),a_h2.rank()); + ASSERT_EQ(a_h.rank(),a_d.rank()); + } + if (Kokkos::HostSpace::execution_space::is_initialized() ) + { + typedef Kokkos::DynRankView< int , Kokkos::LayoutStride , Kokkos::HostSpace > view_stride_type ; + unsigned order[] = { 6,5,4,3,2,1,0 }, dimen[] = { N0, N1, N2, 2, 2, 2, 2 }; //LayoutRight equivalent + view_stride_type a_h( "a" , Kokkos::LayoutStride::order_dimensions(7, order, dimen) ); + auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror_view(typename device::memory_space(),a_h); + + int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; + int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; + int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; + + int is_same_memspace = std::is_same<Kokkos::HostSpace,typename device::memory_space>::value?1:0; + ASSERT_EQ(equal_ptr_h_h2,1); + ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); + ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + + ASSERT_EQ(a_h.rank(),a_h2.rank()); + ASSERT_EQ(a_h.rank(),a_d.rank()); + } + } + + static void run_test_scalar() + { + typedef typename dView0::HostMirror hView0 ; //HostMirror of DynRankView is a DynRankView + + dView0 dx , dy ; + hView0 hx , hy ; + + dx = dView0( "dx" ); + dy = dView0( "dy" ); + + hx = Kokkos::Experimental::create_mirror( dx ); + hy = Kokkos::Experimental::create_mirror( dy ); + + hx() = 1 ; + + Kokkos::Experimental::deep_copy( dx , hx ); + Kokkos::Experimental::deep_copy( dy , dx ); + Kokkos::Experimental::deep_copy( hy , dy ); + + ASSERT_EQ( hx(), hy() ); + ASSERT_EQ( dx.rank() , hx.rank() ); + ASSERT_EQ( dy.rank() , hy.rank() ); + + //View - DynRankView Interoperability tests + // deep_copy DynRankView to View + View0 vx("vx"); + Kokkos::deep_copy( vx , dx ); + ASSERT_EQ( rank(dx) , rank(vx) ); + + View0 vy("vy"); + Kokkos::deep_copy( vy , dy ); + ASSERT_EQ( rank(dy) , rank(vy) ); + + // deep_copy View to DynRankView + dView0 dxx("dxx"); + Kokkos::deep_copy( dxx , vx ); + ASSERT_EQ( rank(dxx) , rank(vx) ); + + + View7 vcast = dx.ConstDownCast(); + ASSERT_EQ( dx.dimension_0() , vcast.dimension_0() ); + ASSERT_EQ( dx.dimension_1() , vcast.dimension_1() ); + ASSERT_EQ( dx.dimension_2() , vcast.dimension_2() ); + ASSERT_EQ( dx.dimension_3() , vcast.dimension_3() ); + ASSERT_EQ( dx.dimension_4() , vcast.dimension_4() ); + + View7 vcast1( dy.ConstDownCast() ); + ASSERT_EQ( dy.dimension_0() , vcast1.dimension_0() ); + ASSERT_EQ( dy.dimension_1() , vcast1.dimension_1() ); + ASSERT_EQ( dy.dimension_2() , vcast1.dimension_2() ); + ASSERT_EQ( dy.dimension_3() , vcast1.dimension_3() ); + ASSERT_EQ( dy.dimension_4() , vcast1.dimension_4() ); + + //View - DynRankView Interoperability tests + // copy View to DynRankView + dView0 dfromvx( vx ); + auto hmx = Kokkos::create_mirror_view(dfromvx) ; + Kokkos::deep_copy(hmx , dfromvx); + auto hvx = Kokkos::create_mirror_view(vx) ; + Kokkos::deep_copy(hvx , vx); + ASSERT_EQ( rank(hvx) , rank(hmx) ); + ASSERT_EQ( hvx.dimension_0() , hmx.dimension_0() ); + ASSERT_EQ( hvx.dimension_1() , hmx.dimension_1() ); + + // copy-assign View to DynRankView + dView0 dfromvy = vy ; + auto hmy = Kokkos::create_mirror_view(dfromvy) ; + Kokkos::deep_copy(hmy , dfromvy); + auto hvy = Kokkos::create_mirror_view(vy) ; + Kokkos::deep_copy(hvy , vy); + ASSERT_EQ( rank(hvy) , rank(hmy) ); + ASSERT_EQ( hvy.dimension_0() , hmy.dimension_0() ); + ASSERT_EQ( hvy.dimension_1() , hmy.dimension_1() ); + + + View7 vtest1("vtest1",2,2,2,2,2,2,2); + dView0 dfromv1( vtest1 ); + ASSERT_EQ( dfromv1.rank() , vtest1.Rank ); + ASSERT_EQ( dfromv1.dimension_0() , vtest1.dimension_0() ); + ASSERT_EQ( dfromv1.dimension_1() , vtest1.dimension_1() ); + ASSERT_EQ( dfromv1.use_count() , vtest1.use_count() ); + + dView0 dfromv2( vcast ); + ASSERT_EQ( dfromv2.rank() , vcast.Rank ); + ASSERT_EQ( dfromv2.dimension_0() , vcast.dimension_0() ); + ASSERT_EQ( dfromv2.dimension_1() , vcast.dimension_1() ); + ASSERT_EQ( dfromv2.use_count() , vcast.use_count() ); + + dView0 dfromv3 = vcast1; + ASSERT_EQ( dfromv3.rank() , vcast1.Rank ); + ASSERT_EQ( dfromv3.dimension_0() , vcast1.dimension_0() ); + ASSERT_EQ( dfromv3.dimension_1() , vcast1.dimension_1() ); + ASSERT_EQ( dfromv3.use_count() , vcast1.use_count() ); + } + + static void run_test() + { + // mfh 14 Feb 2014: This test doesn't actually create instances of + // these types. In order to avoid "declared but unused typedef" + // warnings, we declare empty instances of these types, with the + // usual "(void)" marker to avoid compiler warnings for unused + // variables. + + typedef typename dView0::HostMirror hView0 ; + + { + hView0 thing; + (void) thing; + } + + dView0 d_uninitialized(Kokkos::ViewAllocateWithoutInitializing("uninit"),10,20); + ASSERT_TRUE( d_uninitialized.data() != nullptr ); + ASSERT_EQ( d_uninitialized.rank() , 2 ); + ASSERT_EQ( d_uninitialized.dimension_0() , 10 ); + ASSERT_EQ( d_uninitialized.dimension_1() , 20 ); + ASSERT_EQ( d_uninitialized.dimension_2() , 1 ); + + dView0 dx , dy , dz ; + hView0 hx , hy , hz ; + + ASSERT_TRUE( Kokkos::Experimental::is_dyn_rank_view<dView0>::value ); + ASSERT_FALSE( Kokkos::Experimental::is_dyn_rank_view< Kokkos::View<double> >::value ); + + ASSERT_TRUE( dx.ptr_on_device() == 0 ); //Okay with UVM + ASSERT_TRUE( dy.ptr_on_device() == 0 ); //Okay with UVM + ASSERT_TRUE( dz.ptr_on_device() == 0 ); //Okay with UVM + ASSERT_TRUE( hx.ptr_on_device() == 0 ); + ASSERT_TRUE( hy.ptr_on_device() == 0 ); + ASSERT_TRUE( hz.ptr_on_device() == 0 ); + ASSERT_EQ( dx.dimension_0() , 0u ); //Okay with UVM + ASSERT_EQ( dy.dimension_0() , 0u ); //Okay with UVM + ASSERT_EQ( dz.dimension_0() , 0u ); //Okay with UVM + ASSERT_EQ( hx.dimension_0() , 0u ); + ASSERT_EQ( hy.dimension_0() , 0u ); + ASSERT_EQ( hz.dimension_0() , 0u ); + ASSERT_EQ( dx.rank() , 0u ); //Okay with UVM + ASSERT_EQ( hx.rank() , 0u ); + + dx = dView0( "dx" , N1 , N2 , N3 ); + dy = dView0( "dy" , N1 , N2 , N3 ); + + hx = hView0( "hx" , N1 , N2 , N3 ); + hy = hView0( "hy" , N1 , N2 , N3 ); + + ASSERT_EQ( dx.dimension_0() , unsigned(N1) ); //Okay with UVM + ASSERT_EQ( dy.dimension_0() , unsigned(N1) ); //Okay with UVM + ASSERT_EQ( hx.dimension_0() , unsigned(N1) ); + ASSERT_EQ( hy.dimension_0() , unsigned(N1) ); + ASSERT_EQ( dx.rank() , 3 ); //Okay with UVM + ASSERT_EQ( hx.rank() , 3 ); + + dx = dView0( "dx" , N0 , N1 , N2 , N3 ); + dy = dView0( "dy" , N0 , N1 , N2 , N3 ); + hx = hView0( "hx" , N0 , N1 , N2 , N3 ); + hy = hView0( "hy" , N0 , N1 , N2 , N3 ); + + ASSERT_EQ( dx.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dy.dimension_0() , unsigned(N0) ); + ASSERT_EQ( hx.dimension_0() , unsigned(N0) ); + ASSERT_EQ( hy.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dx.rank() , 4 ); + ASSERT_EQ( dy.rank() , 4 ); + ASSERT_EQ( hx.rank() , 4 ); + ASSERT_EQ( hy.rank() , 4 ); + + ASSERT_EQ( dx.use_count() , size_t(1) ); + + dView0_unmanaged unmanaged_dx = dx; + ASSERT_EQ( dx.use_count() , size_t(1) ); + + + dView0_unmanaged unmanaged_from_ptr_dx = dView0_unmanaged(dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), + dx.dimension_3()); + + + { + // Destruction of this view should be harmless + const_dView0 unmanaged_from_ptr_const_dx( dx.ptr_on_device() , + dx.dimension_0() , + dx.dimension_1() , + dx.dimension_2() , + dx.dimension_3() ); + } + + const_dView0 const_dx = dx ; + ASSERT_EQ( dx.use_count() , size_t(2) ); + + { + const_dView0 const_dx2; + const_dx2 = const_dx; + ASSERT_EQ( dx.use_count() , size_t(3) ); + + const_dx2 = dy; + ASSERT_EQ( dx.use_count() , size_t(2) ); + + const_dView0 const_dx3(dx); + ASSERT_EQ( dx.use_count() , size_t(3) ); + + dView0_unmanaged dx4_unmanaged(dx); + ASSERT_EQ( dx.use_count() , size_t(3) ); + } + + ASSERT_EQ( dx.use_count() , size_t(2) ); + + + ASSERT_FALSE( dx.ptr_on_device() == 0 ); + ASSERT_FALSE( const_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( unmanaged_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( unmanaged_from_ptr_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( dy.ptr_on_device() == 0 ); + ASSERT_NE( dx , dy ); + + ASSERT_EQ( dx.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dx.dimension_2() , unsigned(N2) ); + ASSERT_EQ( dx.dimension_3() , unsigned(N3) ); + + ASSERT_EQ( dy.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dy.dimension_2() , unsigned(N2) ); + ASSERT_EQ( dy.dimension_3() , unsigned(N3) ); + + ASSERT_EQ( unmanaged_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + + hx = Kokkos::Experimental::create_mirror( dx ); + hy = Kokkos::Experimental::create_mirror( dy ); + + ASSERT_EQ( hx.rank() , dx.rank() ); + ASSERT_EQ( hy.rank() , dy.rank() ); + + ASSERT_EQ( hx.dimension_0() , unsigned(N0) ); + ASSERT_EQ( hx.dimension_1() , unsigned(N1) ); + ASSERT_EQ( hx.dimension_2() , unsigned(N2) ); + ASSERT_EQ( hx.dimension_3() , unsigned(N3) ); + + ASSERT_EQ( hy.dimension_0() , unsigned(N0) ); + ASSERT_EQ( hy.dimension_1() , unsigned(N1) ); + ASSERT_EQ( hy.dimension_2() , unsigned(N2) ); + ASSERT_EQ( hy.dimension_3() , unsigned(N3) ); + + // T v1 = hx() ; // Generates compile error as intended + // T v2 = hx(0,0) ; // Generates compile error as intended + // hx(0,0) = v2 ; // Generates compile error as intended + +/* +#if ! KOKKOS_USING_EXP_VIEW + // Testing with asynchronous deep copy with respect to device + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + + Kokkos::deep_copy(typename hView0::execution_space(), dx , hx ); + Kokkos::deep_copy(typename hView0::execution_space(), dy , dx ); + Kokkos::deep_copy(typename hView0::execution_space(), hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::deep_copy(typename hView0::execution_space(), dx , T(0) ); + Kokkos::deep_copy(typename hView0::execution_space(), hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} + } + + // Testing with asynchronous deep copy with respect to host + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + Kokkos::deep_copy(typename dView0::execution_space(), dx , hx ); + Kokkos::deep_copy(typename dView0::execution_space(), dy , dx ); + Kokkos::deep_copy(typename dView0::execution_space(), hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::deep_copy(typename dView0::execution_space(), dx , T(0) ); + Kokkos::deep_copy(typename dView0::execution_space(), hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} + } +#endif */ // #if ! KOKKOS_USING_EXP_VIEW + + // Testing with synchronous deep copy + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + Kokkos::Experimental::deep_copy( dx , hx ); + Kokkos::Experimental::deep_copy( dy , dx ); + Kokkos::Experimental::deep_copy( hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::Experimental::deep_copy( dx , T(0) ); + Kokkos::Experimental::deep_copy( hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} +// ASSERT_EQ( hx(0,0,0,0,0,0,0,0) , T(0) ); //Test rank8 op behaves properly - if implemented + } + + dz = dx ; ASSERT_EQ( dx, dz); ASSERT_NE( dy, dz); + dz = dy ; ASSERT_EQ( dy, dz); ASSERT_NE( dx, dz); + + dx = dView0(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_FALSE( dy.ptr_on_device() == 0 ); + ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dy = dView0(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_TRUE( dy.ptr_on_device() == 0 ); + ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dz = dView0(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_TRUE( dy.ptr_on_device() == 0 ); + ASSERT_TRUE( dz.ptr_on_device() == 0 ); + + //View - DynRankView Interoperability tests + // deep_copy from view to dynrankview + const int testdim = 4; + dView0 dxx("dxx",testdim); + View1 vxx("vxx",testdim); + auto hvxx = Kokkos::create_mirror_view(vxx); + for (int i = 0; i < testdim; ++i) + { hvxx(i) = i; } + Kokkos::deep_copy(vxx,hvxx); + Kokkos::deep_copy(dxx,vxx); + auto hdxx = Kokkos::create_mirror_view(dxx); + Kokkos::deep_copy(hdxx,dxx); + for (int i = 0; i < testdim; ++i) + { ASSERT_EQ( hvxx(i) , hdxx(i) ); } + + ASSERT_EQ( rank(hdxx) , rank(hvxx) ); + ASSERT_EQ( hdxx.dimension_0() , testdim ); + ASSERT_EQ( hdxx.dimension_0() , hvxx.dimension_0() ); + + // deep_copy from dynrankview to view + View1 vdxx("vdxx",testdim); + auto hvdxx = Kokkos::create_mirror_view(vdxx); + Kokkos::deep_copy(hvdxx , hdxx); + ASSERT_EQ( rank(hdxx) , rank(hvdxx) ); + ASSERT_EQ( hvdxx.dimension_0() , testdim ); + ASSERT_EQ( hdxx.dimension_0() , hvdxx.dimension_0() ); + for (int i = 0; i < testdim; ++i) + { ASSERT_EQ( hvxx(i) , hvdxx(i) ); } + } + + typedef T DataType ; + + static void + check_auto_conversion_to_const( + const Kokkos::Experimental::DynRankView< const DataType , device > & arg_const , + const Kokkos::Experimental::DynRankView< DataType , device > & arg ) + { + ASSERT_TRUE( arg_const == arg ); + } + + static void run_test_const() + { + typedef Kokkos::Experimental::DynRankView< DataType , device > typeX ; + typedef Kokkos::Experimental::DynRankView< const DataType , device > const_typeX ; + typedef Kokkos::Experimental::DynRankView< const DataType , device , Kokkos::MemoryRandomAccess > const_typeR ; + typeX x( "X", 2 ); + const_typeX xc = x ; + const_typeR xr = x ; + + ASSERT_TRUE( xc == x ); + ASSERT_TRUE( x == xc ); + + // For CUDA the constant random access View does not return + // an lvalue reference due to retrieving through texture cache + // therefore not allowed to query the underlying pointer. +#if defined(KOKKOS_HAVE_CUDA) + if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value ) +#endif + { + ASSERT_TRUE( x.ptr_on_device() == xr.ptr_on_device() ); + } + + // typeX xf = xc ; // setting non-const from const must not compile + + check_auto_conversion_to_const( x , x ); + } + + + static void run_test_subview() + { + typedef Kokkos::Experimental::DynRankView< const T , device > cdView ; + typedef Kokkos::Experimental::DynRankView< T , device > dView ; + // LayoutStride required for all returned DynRankView subdynrankview's + typedef Kokkos::Experimental::DynRankView< T , Kokkos::LayoutStride , device > sdView ; + + dView0 d0( "d0" ); + cdView s0 = d0 ; + + // N0 = 1000,N1 = 3,N2 = 5,N3 = 7 + unsigned order[] = { 6,5,4,3,2,1,0 }, dimen[] = { N0, N1, N2, 2, 2, 2, 2 }; //LayoutRight equivalent + sdView d7( "d7" , Kokkos::LayoutStride::order_dimensions(7, order, dimen) ); + ASSERT_EQ( d7.rank() , 7 ); + + sdView ds0 = Kokkos::subdynrankview( d7 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ); + ASSERT_EQ( ds0.rank() , 0 ); + +//Basic test - ALL + sdView dsALL = Kokkos::Experimental::subdynrankview( d7 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() ); + ASSERT_EQ( dsALL.rank() , 7 ); + +// Send a value to final rank returning rank 6 subview + sdView dsm1 = Kokkos::Experimental::subdynrankview( d7 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , 1 ); + ASSERT_EQ( dsm1.rank() , 6 ); + +// Send a std::pair as argument to a rank + sdView dssp = Kokkos::Experimental::subdynrankview( d7 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , std::pair<unsigned,unsigned>(1,2) ); + ASSERT_EQ( dssp.rank() , 7 ); + +// Send a kokkos::pair as argument to a rank; take default layout as input + dView0 dd0("dd0" , N0 , N1 , N2 , 2 , 2 , 2 , 2 ); //default layout + ASSERT_EQ( dd0.rank() , 7 ); + sdView dtkp = Kokkos::Experimental::subdynrankview( dd0 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::pair<unsigned,unsigned>(0,1) ); + ASSERT_EQ( dtkp.rank() , 7 ); + +// Return rank 7 subview, taking a pair as one argument, layout stride input + sdView ds7 = Kokkos::Experimental::subdynrankview( d7 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::pair<unsigned,unsigned>(0,1) ); + ASSERT_EQ( ds7.rank() , 7 ); + +// Default Layout DynRankView + dView dv6("dv6" , N0 , N1 , N2 , N3 , 2 , 2 ); + ASSERT_EQ( dv6.rank() , 6 ); + +// DynRankView with LayoutRight + typedef Kokkos::Experimental::DynRankView< T , Kokkos::LayoutRight , device > drView ; + drView dr5( "dr5" , N0 , N1 , N2 , 2 , 2 ); + ASSERT_EQ( dr5.rank() , 5 ); + +// LayoutStride but arranged as LayoutRight + // NOTE: unused arg_layout dimensions must be set to ~size_t(0) so that + // rank deduction can properly take place + unsigned order5[] = { 4,3,2,1,0 }, dimen5[] = { N0, N1, N2, 2, 2 }; + Kokkos::LayoutStride ls = Kokkos::LayoutStride::order_dimensions(5, order5, dimen5); + ls.dimension[5] = ~size_t(0); + ls.dimension[6] = ~size_t(0); + ls.dimension[7] = ~size_t(0); + sdView d5("d5", ls); + ASSERT_EQ( d5.rank() , 5 ); + +// LayoutStride arranged as LayoutRight - commented out as example that fails unit test +// unsigned order5[] = { 4,3,2,1,0 }, dimen5[] = { N0, N1, N2, 2, 2 }; +// sdView d5( "d5" , Kokkos::LayoutStride::order_dimensions(5, order5, dimen5) ); +// +// Fails the following unit test: +// ASSERT_EQ( d5.rank() , dr5.rank() ); +// +// Explanation: In construction of the Kokkos::LayoutStride below, since the +// remaining dimensions are not specified, they will default to values of 0 +// rather than ~size_t(0). +// When passed to the DynRankView constructor the default dimensions (of 0) +// will be counted toward the dynamic rank and returning an incorrect value +// (i.e. rank 7 rather than 5). + +// Check LayoutRight dr5 and LayoutStride d5 dimensions agree (as they should) + ASSERT_EQ( d5.dimension_0() , dr5.dimension_0() ); + ASSERT_EQ( d5.dimension_1() , dr5.dimension_1() ); + ASSERT_EQ( d5.dimension_2() , dr5.dimension_2() ); + ASSERT_EQ( d5.dimension_3() , dr5.dimension_3() ); + ASSERT_EQ( d5.dimension_4() , dr5.dimension_4() ); + ASSERT_EQ( d5.dimension_5() , dr5.dimension_5() ); + ASSERT_EQ( d5.rank() , dr5.rank() ); + +// Rank 5 subview of rank 5 dynamic rank view, layout stride input + sdView ds5 = Kokkos::Experimental::subdynrankview( d5 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::pair<unsigned,unsigned>(0,1) ); + ASSERT_EQ( ds5.rank() , 5 ); + +// Pass in extra ALL arguments beyond the rank of the DynRank View. +// This behavior is allowed - ignore the extra ALL arguments when +// the src.rank() < number of arguments, but be careful! + sdView ds5plus = Kokkos::Experimental::subdynrankview( d5 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::pair<unsigned,unsigned>(0,1) , Kokkos::ALL() ); + + ASSERT_EQ( ds5.rank() , ds5plus.rank() ); + ASSERT_EQ( ds5.dimension_0() , ds5plus.dimension_0() ); + ASSERT_EQ( ds5.dimension_4() , ds5plus.dimension_4() ); + ASSERT_EQ( ds5.dimension_5() , ds5plus.dimension_5() ); + +#if ! defined( KOKKOS_HAVE_CUDA ) || defined ( KOKKOS_USE_CUDA_UVM ) + ASSERT_EQ( & ds5(1,1,1,1,0) - & ds5plus(1,1,1,1,0) , 0 ); + ASSERT_EQ( & ds5(1,1,1,1,0,0) - & ds5plus(1,1,1,1,0,0) , 0 ); // passing argument to rank beyond the view's rank is allowed iff it is a 0. +#endif + +// Similar test to rank 5 above, but create rank 4 subview +// Check that the rank contracts (ds4 and ds4plus) and that subdynrankview can accept extra args (ds4plus) + sdView ds4 = Kokkos::Experimental::subdynrankview( d5 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , 0 ); + sdView ds4plus = Kokkos::Experimental::subdynrankview( d5 , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , Kokkos::ALL() , 0 , Kokkos::ALL() ); + + ASSERT_EQ( ds4.rank() , ds4plus.rank() ); + ASSERT_EQ( ds4.rank() , 4 ); + ASSERT_EQ( ds4.dimension_0() , ds4plus.dimension_0() ); + ASSERT_EQ( ds4.dimension_4() , ds4plus.dimension_4() ); + ASSERT_EQ( ds4.dimension_5() , ds4plus.dimension_5() ); + } + + static void run_test_subview_strided() + { + typedef Kokkos::Experimental::DynRankView < int , Kokkos::LayoutLeft , host_drv_space > drview_left ; + typedef Kokkos::Experimental::DynRankView < int , Kokkos::LayoutRight , host_drv_space > drview_right ; + typedef Kokkos::Experimental::DynRankView < int , Kokkos::LayoutStride , host_drv_space > drview_stride ; + + drview_left xl2( "xl2", 100 , 200 ); + drview_right xr2( "xr2", 100 , 200 ); + drview_stride yl1 = Kokkos::Experimental::subdynrankview( xl2 , 0 , Kokkos::ALL() ); + drview_stride yl2 = Kokkos::Experimental::subdynrankview( xl2 , 1 , Kokkos::ALL() ); + drview_stride ys1 = Kokkos::Experimental::subdynrankview( xr2 , 0 , Kokkos::ALL() ); + drview_stride ys2 = Kokkos::Experimental::subdynrankview( xr2 , 1 , Kokkos::ALL() ); + drview_stride yr1 = Kokkos::Experimental::subdynrankview( xr2 , 0 , Kokkos::ALL() ); + drview_stride yr2 = Kokkos::Experimental::subdynrankview( xr2 , 1 , Kokkos::ALL() ); + + ASSERT_EQ( yl1.dimension_0() , xl2.dimension_1() ); + ASSERT_EQ( yl2.dimension_0() , xl2.dimension_1() ); + + ASSERT_EQ( yr1.dimension_0() , xr2.dimension_1() ); + ASSERT_EQ( yr2.dimension_0() , xr2.dimension_1() ); + + ASSERT_EQ( & yl1(0) - & xl2(0,0) , 0 ); + ASSERT_EQ( & yl2(0) - & xl2(1,0) , 0 ); + ASSERT_EQ( & yr1(0) - & xr2(0,0) , 0 ); + ASSERT_EQ( & yr2(0) - & xr2(1,0) , 0 ); + + + drview_left xl4( "xl4", 10 , 20 , 30 , 40 ); + drview_right xr4( "xr4", 10 , 20 , 30 , 40 ); + + //Replace subdynrankview with subview - test + drview_stride yl4 = Kokkos::Experimental::subview( xl4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); + drview_stride yr4 = Kokkos::Experimental::subview( xr4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); + + ASSERT_EQ( yl4.dimension_0() , xl4.dimension_1() ); + ASSERT_EQ( yl4.dimension_1() , xl4.dimension_3() ); + ASSERT_EQ( yr4.dimension_0() , xr4.dimension_1() ); + ASSERT_EQ( yr4.dimension_1() , xr4.dimension_3() ); + ASSERT_EQ( yl4.rank() , 2); + ASSERT_EQ( yr4.rank() , 2); + + ASSERT_EQ( & yl4(4,4) - & xl4(1,4,2,4) , 0 ); + ASSERT_EQ( & yr4(4,4) - & xr4(1,4,2,4) , 0 ); + } + + static void run_test_vector() + { + static const unsigned Length = 1000 , Count = 8 ; + + typedef typename Kokkos::Experimental::DynRankView< T , Kokkos::LayoutLeft , host_drv_space > multivector_type ; + + typedef typename Kokkos::Experimental::DynRankView< T , Kokkos::LayoutRight , host_drv_space > multivector_right_type ; + + multivector_type mv = multivector_type( "mv" , Length , Count ); + multivector_right_type mv_right = multivector_right_type( "mv" , Length , Count ); + + typedef typename Kokkos::Experimental::DynRankView< T , Kokkos::LayoutStride , host_drv_space > svector_type ; + typedef typename Kokkos::Experimental::DynRankView< T , Kokkos::LayoutStride , host_drv_space > smultivector_type ; + typedef typename Kokkos::Experimental::DynRankView< const T , Kokkos::LayoutStride , host_drv_space > const_svector_right_type ; + typedef typename Kokkos::Experimental::DynRankView< const T , Kokkos::LayoutStride , host_drv_space > const_svector_type ; + typedef typename Kokkos::Experimental::DynRankView< const T , Kokkos::LayoutStride , host_drv_space > const_smultivector_type ; + + svector_type v1 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 0 ); + svector_type v2 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 1 ); + svector_type v3 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 2 ); + + svector_type rv1 = Kokkos::Experimental::subdynrankview( mv_right , 0 , Kokkos::ALL() ); + svector_type rv2 = Kokkos::Experimental::subdynrankview( mv_right , 1 , Kokkos::ALL() ); + svector_type rv3 = Kokkos::Experimental::subdynrankview( mv_right , 2 , Kokkos::ALL() ); + + smultivector_type mv1 = Kokkos::Experimental::subdynrankview( mv , std::make_pair( 1 , 998 ) , + std::make_pair( 2 , 5 ) ); + + smultivector_type mvr1 = + Kokkos::Experimental::subdynrankview( mv_right , + std::make_pair( 1 , 998 ) , + std::make_pair( 2 , 5 ) ); + + const_svector_type cv1 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL(), 0 ); + const_svector_type cv2 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL(), 1 ); + const_svector_type cv3 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL(), 2 ); + + svector_type vr1 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 0 ); + svector_type vr2 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 1 ); + svector_type vr3 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 2 ); + + const_svector_right_type cvr1 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 0 ); + const_svector_right_type cvr2 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 1 ); + const_svector_right_type cvr3 = Kokkos::Experimental::subdynrankview( mv , Kokkos::ALL() , 2 ); + + + ASSERT_TRUE( & v1[0] == & v1(0) ); + ASSERT_TRUE( & v1[0] == & mv(0,0) ); + ASSERT_TRUE( & v2[0] == & mv(0,1) ); + ASSERT_TRUE( & v3[0] == & mv(0,2) ); + + ASSERT_TRUE( & cv1[0] == & mv(0,0) ); + ASSERT_TRUE( & cv2[0] == & mv(0,1) ); + ASSERT_TRUE( & cv3[0] == & mv(0,2) ); + + ASSERT_TRUE( & vr1[0] == & mv(0,0) ); + ASSERT_TRUE( & vr2[0] == & mv(0,1) ); + ASSERT_TRUE( & vr3[0] == & mv(0,2) ); + + ASSERT_TRUE( & cvr1[0] == & mv(0,0) ); + ASSERT_TRUE( & cvr2[0] == & mv(0,1) ); + ASSERT_TRUE( & cvr3[0] == & mv(0,2) ); + + + ASSERT_TRUE( & mv1(0,0) == & mv( 1 , 2 ) ); + ASSERT_TRUE( & mv1(1,1) == & mv( 2 , 3 ) ); + ASSERT_TRUE( & mv1(3,2) == & mv( 4 , 4 ) ); + ASSERT_TRUE( & mvr1(0,0) == & mv_right( 1 , 2 ) ); + ASSERT_TRUE( & mvr1(1,1) == & mv_right( 2 , 3 ) ); + ASSERT_TRUE( & mvr1(3,2) == & mv_right( 4 , 4 ) ); + + const_svector_type c_cv1( v1 ); + typename svector_type::const_type c_cv2( v2 ); + typename const_svector_type::const_type c_ccv2( v2 ); + + + const_smultivector_type cmv( mv ); + typename smultivector_type::const_type cmvX( cmv ); + typename const_smultivector_type::const_type ccmvX( cmv ); + } +}; + +} // namespace Test + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7e3ca005f4b6401a088208fca120c097143afc49 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp @@ -0,0 +1,168 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_DYNAMICVIEW_HPP +#define KOKKOS_TEST_DYNAMICVIEW_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <Kokkos_Core.hpp> + +#include <Kokkos_DynamicView.hpp> +#include <impl/Kokkos_Timer.hpp> + +namespace Test { + +template< typename Scalar , class Space > +struct TestDynamicView +{ + typedef typename Space::execution_space execution_space ; + typedef typename Space::memory_space memory_space ; + + typedef Kokkos::Experimental::MemoryPool<typename Space::device_type> memory_pool_type; + + typedef Kokkos::Experimental::DynamicView<Scalar*,Space> view_type; + + typedef typename Kokkos::TeamPolicy<execution_space>::member_type member_type ; + typedef double value_type; + + struct TEST {}; + struct VERIFY {}; + + view_type a; + const unsigned total_size ; + + TestDynamicView( const view_type & arg_a , const unsigned arg_total ) + : a(arg_a), total_size( arg_total ) {} + + KOKKOS_INLINE_FUNCTION + void operator() ( const TEST , member_type team_member, double& value) const + { + const unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if ( team_member.team_rank() == 0 ) { + unsigned n = team_idx + team_member.team_size(); + + if ( total_size < n ) n = total_size ; + + a.resize_parallel( n ); + + if ( a.extent(0) < n ) { + Kokkos::abort("GrowTest TEST failed resize_parallel"); + } + } + + // Make sure resize is done for all team members: + team_member.team_barrier(); + + const unsigned int val = team_idx + team_member.team_rank(); + + if ( val < total_size ) { + value += val ; + + a( val ) = val ; + } + } + + KOKKOS_INLINE_FUNCTION + void operator() ( const VERIFY , member_type team_member, double& value) const + { + const unsigned int val = + team_member.team_rank() + + team_member.league_rank() * team_member.team_size(); + + if ( val < total_size ) { + + if ( val != a(val) ) { + Kokkos::abort("GrowTest VERIFY failed resize_parallel"); + } + + value += a(val); + } + } + + static void run( unsigned arg_total_size ) + { + typedef Kokkos::TeamPolicy<execution_space,TEST> TestPolicy ; + typedef Kokkos::TeamPolicy<execution_space,VERIFY> VerifyPolicy ; + +// printf("TestDynamicView::run(%d) construct memory pool\n",arg_total_size); + + memory_pool_type pool( memory_space() , arg_total_size * sizeof(Scalar) * 1.2 ); + +// printf("TestDynamicView::run(%d) construct dynamic view\n",arg_total_size); + + view_type da("A",pool,arg_total_size); + +// printf("TestDynamicView::run(%d) construct test functor\n",arg_total_size); + + TestDynamicView functor(da,arg_total_size); + + const unsigned team_size = TestPolicy::team_size_recommended(functor); + const unsigned league_size = ( arg_total_size + team_size - 1 ) / team_size ; + + double reference = 0; + double result = 0; + +// printf("TestDynamicView::run(%d) run functor test\n",arg_total_size); + + Kokkos::parallel_reduce( TestPolicy(league_size,team_size) , functor , reference); + execution_space::fence(); + + +// printf("TestDynamicView::run(%d) run functor verify\n",arg_total_size); + + Kokkos::parallel_reduce( VerifyPolicy(league_size,team_size) , functor , result ); + execution_space::fence(); + +// printf("TestDynamicView::run(%d) done\n",arg_total_size); + + } +}; + +} // namespace Test + +#endif /* #ifndef KOKKOS_TEST_DYNAMICVIEW_HPP */ + diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4319f39ff7ce626f45a3b7cd3fe9b2a823d1132 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp @@ -0,0 +1,182 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <Kokkos_Bitset.hpp> +#include <Kokkos_UnorderedMap.hpp> +#include <Kokkos_Vector.hpp> + +//---------------------------------------------------------------------------- +#include <TestBitset.hpp> +#include <TestUnorderedMap.hpp> +#include <TestStaticCrsGraph.hpp> +#include <TestVector.hpp> +#include <TestDualView.hpp> +#include <TestDynamicView.hpp> +#include <TestSegmentedView.hpp> +#include <TestComplex.hpp> + +#include <Kokkos_DynRankView.hpp> +#include <TestDynViewAPI.hpp> + +#include <iomanip> + +namespace Test { + +#ifdef KOKKOS_HAVE_OPENMP +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned threads_count = 4 ; + + if ( Kokkos::hwloc::available() ) { + threads_count = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa(); + } + + Kokkos::OpenMP::initialize( threads_count ); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + } +}; + +TEST_F( openmp, complex ) +{ + testComplex<Kokkos::OpenMP> (); +} + +TEST_F( openmp, dyn_view_api) { + TestDynViewAPI< double , Kokkos::OpenMP >(); +} + +TEST_F( openmp, bitset ) +{ + test_bitset<Kokkos::OpenMP>(); +} + +TEST_F( openmp , staticcrsgraph ) +{ + TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >(); + TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >(); +} + +#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \ + TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \ + } + +#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \ + TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_failed_insert<Kokkos::OpenMP>(num_nodes); \ + } + +#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \ + TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_assignement_operators<Kokkos::OpenMP>(num_nodes); \ + } + +#define OPENMP_DEEP_COPY( num_nodes, repeat ) \ + TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_deep_copy<Kokkos::OpenMP>(num_nodes); \ + } + +#define OPENMP_VECTOR_COMBINE_TEST( size ) \ + TEST_F( openmp, vector_combination##size##x) { \ + test_vector_combinations<int,Kokkos::OpenMP>(size); \ + } + +#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \ + TEST_F( openmp, dualview_combination##size##x) { \ + test_dualview_combinations<int,Kokkos::OpenMP>(size); \ + } + +#define OPENMP_SEGMENTEDVIEW_TEST( size ) \ + TEST_F( openmp, segmentedview_##size##x) { \ + test_segmented_view<double,Kokkos::OpenMP>(size); \ + } + +OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true) +OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false) +OPENMP_FAILED_INSERT_TEST( 10000, 1000 ) +OPENMP_DEEP_COPY( 10000, 1 ) + +OPENMP_VECTOR_COMBINE_TEST( 10 ) +OPENMP_VECTOR_COMBINE_TEST( 3057 ) +OPENMP_DUALVIEW_COMBINE_TEST( 10 ) +OPENMP_SEGMENTEDVIEW_TEST( 10000 ) + +#undef OPENMP_INSERT_TEST +#undef OPENMP_FAILED_INSERT_TEST +#undef OPENMP_ASSIGNEMENT_TEST +#undef OPENMP_DEEP_COPY +#undef OPENMP_VECTOR_COMBINE_TEST +#undef OPENMP_DUALVIEW_COMBINE_TEST +#undef OPENMP_SEGMENTEDVIEW_TEST +#endif + + +TEST_F( openmp , dynamic_view ) +{ + typedef TestDynamicView< double , Kokkos::OpenMP > + TestDynView ; + + for ( int i = 0 ; i < 10 ; ++i ) { + TestDynView::run( 100000 + 100 * i ); + } +} + +} // namespace test + diff --git a/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp b/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bfd66d12a7dc658fe729ce7016b95d5d05c60202 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp @@ -0,0 +1,708 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP +#define KOKKOS_TEST_SEGMENTEDVIEW_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <Kokkos_Core.hpp> + +#if ! KOKKOS_USING_EXP_VIEW + +#include <Kokkos_SegmentedView.hpp> +#include <impl/Kokkos_Timer.hpp> + +namespace Test { + +namespace Impl { + + template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank> + struct GrowTest; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 1> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+team_member.team_size()); + value += team_idx + team_member.team_rank(); + + if((a.dimension_0()>team_idx+team_member.team_rank()) && + (a.dimension(0)>team_idx+team_member.team_rank())) + a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank(); + + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 2> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + value += team_idx + team_member.team_rank() + 13*k; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) { + a(team_idx+ team_member.team_rank(),k) = + team_idx+ team_member.team_rank() + 13*k; + } + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 3> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + value += team_idx + team_member.team_rank() + 13*k + 3*l; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + a(team_idx+ team_member.team_rank(),k,l) = + team_idx+ team_member.team_rank() + 13*k + 3*l; + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 4> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + for( typename ExecutionSpace::size_type m=0;m<2;m++) + value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + a(team_idx+ team_member.team_rank(),k,l,m) = + team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m; + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 5> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + for( typename ExecutionSpace::size_type m=0;m<2;m++) + for( typename ExecutionSpace::size_type n=0;n<3;n++) + value += + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + a(team_idx+ team_member.team_rank(),k,l,m,n) = + team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n; + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 6> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + for( typename ExecutionSpace::size_type m=0;m<2;m++) + for( typename ExecutionSpace::size_type n=0;n<3;n++) + for( typename ExecutionSpace::size_type o=0;o<2;o++) + value += + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + a(team_idx+ team_member.team_rank(),k,l,m,n,o) = + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ; + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 7> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + a.grow(team_member , team_idx+ team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + for( typename ExecutionSpace::size_type m=0;m<2;m++) + for( typename ExecutionSpace::size_type n=0;n<3;n++) + for( typename ExecutionSpace::size_type o=0;o<2;o++) + for( typename ExecutionSpace::size_type p=0;p<4;p++) + value += + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) + a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) = + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ; + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct GrowTest<ViewType , ExecutionSpace , 8> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + GrowTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + a.grow(team_member , team_idx + team_member.team_size()); + + for( typename ExecutionSpace::size_type k=0;k<7;k++) + for( typename ExecutionSpace::size_type l=0;l<3;l++) + for( typename ExecutionSpace::size_type m=0;m<2;m++) + for( typename ExecutionSpace::size_type n=0;n<3;n++) + for( typename ExecutionSpace::size_type o=0;o<2;o++) + for( typename ExecutionSpace::size_type p=0;p<4;p++) + for( typename ExecutionSpace::size_type q=0;q<3;q++) + value += + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q; + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) + for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++) + a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) = + team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q; + } + } + }; + + template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank> + struct VerifyTest; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 1> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + value += a(team_idx+ team_member.team_rank()); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 2> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + value += a(team_idx+ team_member.team_rank(),k); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 3> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + value += a(team_idx+ team_member.team_rank(),k,l); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 4> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + value += a(team_idx+ team_member.team_rank(),k,l,m); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 5> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + value += a(team_idx+ team_member.team_rank(),k,l,m,n); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 6> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + value += a(team_idx+ team_member.team_rank(),k,l,m,n,o); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 7> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) + value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p); + } + } + }; + + template<class ViewType , class ExecutionSpace> + struct VerifyTest<ViewType , ExecutionSpace , 8> { + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + typedef typename Policy::member_type team_type; + typedef double value_type; + + ViewType a; + + VerifyTest(ViewType in):a(in) {} + + KOKKOS_INLINE_FUNCTION + void operator() (team_type team_member, double& value) const { + unsigned int team_idx = team_member.league_rank() * team_member.team_size(); + + if((a.dimension_0()>team_idx+ team_member.team_rank()) && + (a.dimension(0)>team_idx+ team_member.team_rank())) { + for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) + for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) + for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) + for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) + for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) + for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) + for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++) + value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q); + } + } + }; + + template <typename Scalar, class ExecutionSpace> + struct test_segmented_view + { + typedef test_segmented_view<Scalar,ExecutionSpace> self_type; + + typedef Scalar scalar_type; + typedef ExecutionSpace execution_space; + typedef Kokkos::TeamPolicy<execution_space> Policy; + + double result; + double reference; + + template <class ViewType> + void run_me(ViewType a, int max_length){ + const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) ); + const int nteams = max_length/team_size; + + reference = 0; + result = 0; + + Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference); + Kokkos::fence(); + Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result); + Kokkos::fence(); + } + + + test_segmented_view(unsigned int size,int rank) + { + reference = 0; + result = 0; + + const int dim_1 = 7; + const int dim_2 = 3; + const int dim_3 = 2; + const int dim_4 = 3; + const int dim_5 = 2; + const int dim_6 = 4; + //const int dim_7 = 3; + + if(rank==1) { + typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view; + run_me< rank1_view >(rank1_view("Rank1",128,size), size); + } + if(rank==2) { + typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view; + run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size); + } + if(rank==3) { + typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view; + run_me< rank3_view >(rank3_view("Rank3",128,size), size); + } + if(rank==4) { + typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view; + run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size); + } + if(rank==5) { + typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view; + run_me< rank5_view >(rank5_view("Rank5",128,size), size); + } + if(rank==6) { + typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view; + run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size); + } + if(rank==7) { + typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view; + run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size); + } + if(rank==8) { + typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view; + run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size); + } + } + + }; + +} // namespace Impl + + + + +template <typename Scalar, class ExecutionSpace> +void test_segmented_view(unsigned int size) +{ + { + typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type; + view_type a("A",128,size,7,3,2,3); + double reference; + + Impl::GrowTest<view_type,ExecutionSpace> f(a); + + const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f ); + const int nteams = (size+team_size-1)/team_size; + + Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference); + + size_t real_size = ((size+127)/128)*128; + + ASSERT_EQ(real_size,a.dimension_0()); + ASSERT_EQ(7,a.dimension_1()); + ASSERT_EQ(3,a.dimension_2()); + ASSERT_EQ(2,a.dimension_3()); + ASSERT_EQ(3,a.dimension_4()); + ASSERT_EQ(2,a.dimension_5()); + ASSERT_EQ(4,a.dimension_6()); + ASSERT_EQ(3,a.dimension_7()); + ASSERT_EQ(real_size,a.dimension(0)); + ASSERT_EQ(7,a.dimension(1)); + ASSERT_EQ(3,a.dimension(2)); + ASSERT_EQ(2,a.dimension(3)); + ASSERT_EQ(3,a.dimension(4)); + ASSERT_EQ(2,a.dimension(5)); + ASSERT_EQ(4,a.dimension(6)); + ASSERT_EQ(3,a.dimension(7)); + ASSERT_EQ(8,a.Rank); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7); + ASSERT_EQ(test.reference,test.result); + } + { + Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8); + ASSERT_EQ(test.reference,test.result); + } + +} + + +} // namespace Test + +#else + +template <typename Scalar, class ExecutionSpace> +void test_segmented_view(unsigned int ) {} + +#endif + +#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */ + diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a7c42d27987d2938fb6b10254d72045732e0f74c --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp @@ -0,0 +1,175 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if ! defined(KOKKOS_HAVE_SERIAL) +# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure." +#else + +#include <Kokkos_Bitset.hpp> +#include <Kokkos_UnorderedMap.hpp> +#include <Kokkos_Vector.hpp> + +#include <TestBitset.hpp> +#include <TestUnorderedMap.hpp> +#include <TestStaticCrsGraph.hpp> +#include <TestVector.hpp> +#include <TestDualView.hpp> +#include <TestSegmentedView.hpp> +#include <TestDynamicView.hpp> +#include <TestComplex.hpp> + +#include <iomanip> + +#include <Kokkos_DynRankView.hpp> +#include <TestDynViewAPI.hpp> + +namespace Test { + +class serial : public ::testing::Test { +protected: + static void SetUpTestCase () { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::Serial::initialize (); + } + + static void TearDownTestCase () { + Kokkos::Serial::finalize (); + } +}; + +TEST_F( serial, dyn_view_api) { + TestDynViewAPI< double , Kokkos::Serial >(); +} + +TEST_F( serial , staticcrsgraph ) +{ + TestStaticCrsGraph::run_test_graph< Kokkos::Serial >(); + TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >(); +} + +TEST_F( serial, complex ) +{ + testComplex<Kokkos::Serial> (); +} + +TEST_F( serial, bitset ) +{ + test_bitset<Kokkos::Serial> (); +} + +#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \ + TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \ + } + +#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \ + TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_failed_insert<Kokkos::Serial> (num_nodes); \ + } + +#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \ + TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_assignement_operators<Kokkos::Serial> (num_nodes); \ + } + +#define SERIAL_DEEP_COPY( num_nodes, repeat ) \ + TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_deep_copy<Kokkos::Serial> (num_nodes); \ + } + +#define SERIAL_VECTOR_COMBINE_TEST( size ) \ + TEST_F( serial, vector_combination##size##x) { \ + test_vector_combinations<int,Kokkos::Serial>(size); \ + } + +#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \ + TEST_F( serial, dualview_combination##size##x) { \ + test_dualview_combinations<int,Kokkos::Serial>(size); \ + } + +#define SERIAL_SEGMENTEDVIEW_TEST( size ) \ + TEST_F( serial, segmentedview_##size##x) { \ + test_segmented_view<double,Kokkos::Serial>(size); \ + } + +SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true) +SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false) +SERIAL_FAILED_INSERT_TEST( 10000, 1000 ) +SERIAL_DEEP_COPY( 10000, 1 ) + +SERIAL_VECTOR_COMBINE_TEST( 10 ) +SERIAL_VECTOR_COMBINE_TEST( 3057 ) +SERIAL_DUALVIEW_COMBINE_TEST( 10 ) +SERIAL_SEGMENTEDVIEW_TEST( 10000 ) + +#undef SERIAL_INSERT_TEST +#undef SERIAL_FAILED_INSERT_TEST +#undef SERIAL_ASSIGNEMENT_TEST +#undef SERIAL_DEEP_COPY +#undef SERIAL_VECTOR_COMBINE_TEST +#undef SERIAL_DUALVIEW_COMBINE_TEST +#undef SERIAL_SEGMENTEDVIEW_TEST + +TEST_F( serial , dynamic_view ) +{ + typedef TestDynamicView< double , Kokkos::Serial > + TestDynView ; + + for ( int i = 0 ; i < 10 ; ++i ) { + TestDynView::run( 100000 + 100 * i ); + } +} + +} // namespace Test + +#endif // KOKKOS_HAVE_SERIAL + + diff --git a/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp new file mode 100644 index 0000000000000000000000000000000000000000..52b45b786562efcfbaf10a4db3ac280eb644b09b --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp @@ -0,0 +1,149 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <vector> + +#include <Kokkos_StaticCrsGraph.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace TestStaticCrsGraph { + +template< class Space > +void run_test_graph() +{ + typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ; + typedef typename dView::HostMirror hView ; + + const unsigned LENGTH = 1000 ; + dView dx ; + hView hx ; + + std::vector< std::vector< int > > graph( LENGTH ); + + for ( size_t i = 0 ; i < LENGTH ; ++i ) { + graph[i].reserve(8); + for ( size_t j = 0 ; j < 8 ; ++j ) { + graph[i].push_back( i + j * 3 ); + } + } + + dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph ); + hx = Kokkos::create_mirror( dx ); + + ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH ); + + for ( size_t i = 0 ; i < LENGTH ; ++i ) { + const size_t begin = hx.row_map[i]; + const size_t n = hx.row_map[i+1] - begin ; + ASSERT_EQ( n , graph[i].size() ); + for ( size_t j = 0 ; j < n ; ++j ) { + ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] ); + } + } +} + +template< class Space > +void run_test_graph2() +{ + typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ; + typedef typename dView::HostMirror hView ; + + const unsigned LENGTH = 10 ; + + std::vector< size_t > sizes( LENGTH ); + + size_t total_length = 0 ; + + for ( size_t i = 0 ; i < LENGTH ; ++i ) { + total_length += ( sizes[i] = 6 + i % 4 ); + } + + dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes ); + hView hx = Kokkos::create_mirror( dx ); + hView mx = Kokkos::create_mirror( dx ); + + ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 ); + ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 ); + ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 ); + + ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length ); + ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length ); + ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length ); + + ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 ); + ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 ); + ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 ); + + for ( size_t i = 0 ; i < LENGTH ; ++i ) { + const size_t entry_begin = hx.row_map[i]; + const size_t entry_end = hx.row_map[i+1]; + for ( size_t j = entry_begin ; j < entry_end ; ++j ) { + hx.entries(j,0) = j + 1 ; + hx.entries(j,1) = j + 2 ; + hx.entries(j,2) = j + 3 ; + } + } + + Kokkos::deep_copy( dx.entries , hx.entries ); + Kokkos::deep_copy( mx.entries , dx.entries ); + + ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 ); + + for ( size_t i = 0 ; i < LENGTH ; ++i ) { + const size_t entry_begin = mx.row_map[i]; + const size_t entry_end = mx.row_map[i+1]; + ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] ); + for ( size_t j = entry_begin ; j < entry_end ; ++j ) { + ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) ); + ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) ); + ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) ); + } + } +} + +} /* namespace TestStaticCrsGraph */ + + diff --git a/lib/kokkos/containers/unit_tests/TestThreads.cpp b/lib/kokkos/containers/unit_tests/TestThreads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..58277528d31d6ea6adae2996f5e8329b2c63b791 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestThreads.cpp @@ -0,0 +1,188 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) + +#include <Kokkos_Bitset.hpp> +#include <Kokkos_UnorderedMap.hpp> + +#include <Kokkos_Vector.hpp> +#include <iomanip> + + +//---------------------------------------------------------------------------- +#include <TestBitset.hpp> +#include <TestUnorderedMap.hpp> +#include <TestStaticCrsGraph.hpp> + +#include <TestVector.hpp> +#include <TestDualView.hpp> +#include <TestDynamicView.hpp> +#include <TestSegmentedView.hpp> + +#include <Kokkos_DynRankView.hpp> +#include <TestDynViewAPI.hpp> + +namespace Test { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + // * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +TEST_F( threads , dyn_view_api) { + TestDynViewAPI< double , Kokkos::Threads >(); +} + +TEST_F( threads , staticcrsgraph ) +{ + TestStaticCrsGraph::run_test_graph< Kokkos::Threads >(); + TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >(); +} + +/*TEST_F( threads, bitset ) +{ + test_bitset<Kokkos::Threads>(); +}*/ + +#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \ + TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \ + } + +#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \ + TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_failed_insert<Kokkos::Threads>(num_nodes); \ + } + +#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \ + TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_assignement_operators<Kokkos::Threads>(num_nodes); \ + } + +#define THREADS_DEEP_COPY( num_nodes, repeat ) \ + TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \ + for (int i=0; i<repeat; ++i) \ + test_deep_copy<Kokkos::Threads>(num_nodes); \ + } + +#define THREADS_VECTOR_COMBINE_TEST( size ) \ + TEST_F( threads, vector_combination##size##x) { \ + test_vector_combinations<int,Kokkos::Threads>(size); \ + } + +#define THREADS_DUALVIEW_COMBINE_TEST( size ) \ + TEST_F( threads, dualview_combination##size##x) { \ + test_dualview_combinations<int,Kokkos::Threads>(size); \ + } + +#define THREADS_SEGMENTEDVIEW_TEST( size ) \ + TEST_F( threads, segmentedview_##size##x) { \ + test_segmented_view<double,Kokkos::Threads>(size); \ + } + + +THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false) +THREADS_FAILED_INSERT_TEST( 10000, 1000 ) +THREADS_DEEP_COPY( 10000, 1 ) + +THREADS_VECTOR_COMBINE_TEST( 10 ) +THREADS_VECTOR_COMBINE_TEST( 3057 ) +THREADS_DUALVIEW_COMBINE_TEST( 10 ) +THREADS_SEGMENTEDVIEW_TEST( 10000 ) + + +#undef THREADS_INSERT_TEST +#undef THREADS_FAILED_INSERT_TEST +#undef THREADS_ASSIGNEMENT_TEST +#undef THREADS_DEEP_COPY +#undef THREADS_VECTOR_COMBINE_TEST +#undef THREADS_DUALVIEW_COMBINE_TEST +#undef THREADS_SEGMENTEDVIEW_TEST + + + +TEST_F( threads , dynamic_view ) +{ + typedef TestDynamicView< double , Kokkos::Threads > + TestDynView ; + + for ( int i = 0 ; i < 10 ; ++i ) { + TestDynView::run( 100000 + 100 * i ); + } +} + +} // namespace Test + + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */ + diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ff0328548dee0a3458faa82ab44a16e5a081d29b --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -0,0 +1,313 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_UNORDERED_MAP_HPP +#define KOKKOS_TEST_UNORDERED_MAP_HPP + +#include <gtest/gtest.h> +#include <iostream> + + +namespace Test { + +namespace Impl { + +template <typename MapType, bool Near = false> +struct TestInsert +{ + typedef MapType map_type; + typedef typename map_type::execution_space execution_space; + typedef uint32_t value_type; + + map_type map; + uint32_t inserts; + uint32_t collisions; + + TestInsert( map_type arg_map, uint32_t arg_inserts, uint32_t arg_collisions) + : map(arg_map) + , inserts(arg_inserts) + , collisions(arg_collisions) + {} + + void testit( bool rehash_on_fail = true ) + { + execution_space::fence(); + + uint32_t failed_count = 0; + do { + failed_count = 0; + Kokkos::parallel_reduce(inserts, *this, failed_count); + + if (rehash_on_fail && failed_count > 0u) { + const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + failed_count/collisions ; + map.rehash( new_capacity ); + } + } while (rehash_on_fail && failed_count > 0u); + + execution_space::fence(); + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & failed_count ) const { failed_count = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & failed_count, const volatile value_type & count ) const + { failed_count += count; } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & failed_count) const + { + const uint32_t key = Near ? i/collisions : i%(inserts/collisions); + if (map.insert(key,i).failed()) ++failed_count; + } + +}; + + template <typename MapType, bool Near> + struct TestErase + { + typedef TestErase<MapType, Near> self_type; + + typedef MapType map_type; + typedef typename MapType::execution_space execution_space; + + map_type m_map; + uint32_t m_num_erase; + uint32_t m_num_duplicates; + + TestErase(map_type map, uint32_t num_erases, uint32_t num_duplicates) + : m_map(map) + , m_num_erase(num_erases) + , m_num_duplicates(num_duplicates) + {} + + void testit() + { + execution_space::fence(); + Kokkos::parallel_for(m_num_erase, *this); + execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + void operator()(typename execution_space::size_type i) const + { + if (Near) { + m_map.erase(i/m_num_duplicates); + } + else { + m_map.erase(i%(m_num_erase/m_num_duplicates)); + } + + } + }; + + template <typename MapType> + struct TestFind + { + typedef MapType map_type; + typedef typename MapType::execution_space::execution_space execution_space; + typedef uint32_t value_type; + + map_type m_map; + uint32_t m_num_insert; + uint32_t m_num_duplicates; + uint32_t m_max_key; + + TestFind(map_type map, uint32_t num_inserts, uint32_t num_duplicates) + : m_map(map) + , m_num_insert(num_inserts) + , m_num_duplicates(num_duplicates) + , m_max_key( ((num_inserts + num_duplicates) - 1)/num_duplicates ) + {} + + void testit(value_type &errors) + { + execution_space::execution_space::fence(); + Kokkos::parallel_reduce(m_map.capacity(), *this, errors); + execution_space::execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & dst) + { + dst = 0; + } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & dst, const volatile value_type & src) + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(typename execution_space::size_type i, value_type & errors) const + { + const bool expect_to_find_i = (i < m_max_key); + + const bool exists = m_map.exists(i); + + if (expect_to_find_i && !exists) ++errors; + if (!expect_to_find_i && exists) ++errors; + } + }; + +} // namespace Impl + + + +template <typename Device> +void test_insert( uint32_t num_nodes , uint32_t num_inserts , uint32_t num_duplicates , bool near ) +{ + typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type; + typedef Kokkos::UnorderedMap<const uint32_t,const uint32_t, Device> const_map_type; + + const uint32_t expected_inserts = (num_inserts + num_duplicates -1u) / num_duplicates; + + map_type map; + map.rehash(num_nodes,false); + + if (near) { + Impl::TestInsert<map_type,true> test_insert(map, num_inserts, num_duplicates); + test_insert.testit(); + } else + { + Impl::TestInsert<map_type,false> test_insert(map, num_inserts, num_duplicates); + test_insert.testit(); + } + + const bool print_list = false; + if (print_list) { + Kokkos::Impl::UnorderedMapPrint<map_type> f(map); + f.apply(); + } + + const uint32_t map_size = map.size(); + + ASSERT_FALSE( map.failed_insert()); + { + EXPECT_EQ(expected_inserts, map_size); + + { + uint32_t find_errors = 0; + Impl::TestFind<const_map_type> test_find(map, num_inserts, num_duplicates); + test_find.testit(find_errors); + EXPECT_EQ( 0u, find_errors); + } + + map.begin_erase(); + Impl::TestErase<map_type,false> test_erase(map, num_inserts, num_duplicates); + test_erase.testit(); + map.end_erase(); + EXPECT_EQ(0u, map.size()); + } +} + +template <typename Device> +void test_failed_insert( uint32_t num_nodes) +{ + typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type; + + map_type map(num_nodes); + Impl::TestInsert<map_type> test_insert(map, 2u*num_nodes, 1u); + test_insert.testit(false /*don't rehash on fail*/); + Device::execution_space::fence(); + + EXPECT_TRUE( map.failed_insert() ); +} + + + +template <typename Device> +void test_deep_copy( uint32_t num_nodes ) +{ + typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type; + typedef Kokkos::UnorderedMap<const uint32_t, const uint32_t, Device> const_map_type; + + typedef typename map_type::HostMirror host_map_type ; + // typedef Kokkos::UnorderedMap<uint32_t, uint32_t, typename Device::host_mirror_execution_space > host_map_type; + + map_type map; + map.rehash(num_nodes,false); + + { + Impl::TestInsert<map_type> test_insert(map, num_nodes, 1); + test_insert.testit(); + ASSERT_EQ( map.size(), num_nodes); + ASSERT_FALSE( map.failed_insert() ); + { + uint32_t find_errors = 0; + Impl::TestFind<map_type> test_find(map, num_nodes, 1); + test_find.testit(find_errors); + EXPECT_EQ( find_errors, 0u); + } + + } + + host_map_type hmap; + Kokkos::deep_copy(hmap, map); + + ASSERT_EQ( map.size(), hmap.size()); + ASSERT_EQ( map.capacity(), hmap.capacity()); + { + uint32_t find_errors = 0; + Impl::TestFind<host_map_type> test_find(hmap, num_nodes, 1); + test_find.testit(find_errors); + EXPECT_EQ( find_errors, 0u); + } + + map_type mmap; + Kokkos::deep_copy(mmap, hmap); + + const_map_type cmap = mmap; + + EXPECT_EQ( cmap.size(), num_nodes); + + { + uint32_t find_errors = 0; + Impl::TestFind<const_map_type> test_find(cmap, num_nodes, 1); + test_find.testit(find_errors); + EXPECT_EQ( find_errors, 0u); + } + +} + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/containers/unit_tests/TestVector.hpp b/lib/kokkos/containers/unit_tests/TestVector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f9f4564898edf32e0030d0ca135ff9f43909f397 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestVector.hpp @@ -0,0 +1,131 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_VECTOR_HPP +#define KOKKOS_TEST_VECTOR_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <impl/Kokkos_Timer.hpp> + +namespace Test { + +namespace Impl { + + template <typename Scalar, class Device> + struct test_vector_combinations + { + typedef test_vector_combinations<Scalar,Device> self_type; + + typedef Scalar scalar_type; + typedef Device execution_space; + + Scalar reference; + Scalar result; + + template <typename Vector> + Scalar run_me(unsigned int n){ + Vector a(n,1); + + + a.push_back(2); + a.resize(n+4); + a[n+1] = 3; + a[n+2] = 4; + a[n+3] = 5; + + + Scalar temp1 = a[2]; + Scalar temp2 = a[n]; + Scalar temp3 = a[n+1]; + + a.assign(n+2,-1); + + a[2] = temp1; + a[n] = temp2; + a[n+1] = temp3; + + Scalar test1 = 0; + for(unsigned int i=0; i<a.size(); i++) + test1+=a[i]; + + a.assign(n+1,-2); + Scalar test2 = 0; + for(unsigned int i=0; i<a.size(); i++) + test2+=a[i]; + + a.reserve(n+10); + + Scalar test3 = 0; + for(unsigned int i=0; i<a.size(); i++) + test3+=a[i]; + + + return (test1*test2+test3)*test2+test1*test3; + } + + + test_vector_combinations(unsigned int size) + { + reference = run_me<std::vector<Scalar> >(size); + result = run_me<Kokkos::vector<Scalar,Device> >(size); + } + + }; + +} // namespace Impl + + + + +template <typename Scalar, typename Device> +void test_vector_combinations(unsigned int size) +{ + Impl::test_vector_combinations<Scalar,Device> test(size); + ASSERT_EQ( test.reference, test.result); +} + + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/containers/unit_tests/UnitTestMain.cpp b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f952ab3db51028aff0a0ebfe313b2639e353ab87 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/core/CMakeLists.txt b/lib/kokkos/core/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..42fce6b2f210a73befefbb6c2a768fca5f9440df --- /dev/null +++ b/lib/kokkos/core/CMakeLists.txt @@ -0,0 +1,11 @@ + + +TRIBITS_SUBPACKAGE(Core) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_test) +TRIBITS_ADD_TEST_DIRECTORIES(perf_test) + +TRIBITS_SUBPACKAGE_POSTPROCESS() + diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..34ff0be5d3c6d26761b4758fda5d7217d66660e6 --- /dev/null +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -0,0 +1,4 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in new file mode 100644 index 0000000000000000000000000000000000000000..27e3ba1c31f56aa35c6487488d96fa71f7b25d99 --- /dev/null +++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in @@ -0,0 +1,57 @@ +#ifndef KOKKOS_CORE_CONFIG_H +#define KOKKOS_CORE_CONFIG_H + +/* The trivial 'src/build_common.sh' creates a config + * that must stay in sync with this file. + */ +#cmakedefine KOKKOS_FOR_SIERRA + +#if !defined( KOKKOS_FOR_SIERRA ) + +#cmakedefine KOKKOS_HAVE_MPI +#cmakedefine KOKKOS_HAVE_CUDA + +// mfh 16 Sep 2014: If passed in on the command line, that overrides +// any value of KOKKOS_USE_CUDA_UVM here. Doing this should prevent build +// warnings like this one: +// +// packages/kokkos/core/src/KokkosCore_config.h:13:1: warning: "KOKKOS_USE_CUDA_UVM" redefined +// +// At some point, we should edit the test-build scripts in +// Trilinos/cmake/ctest/drivers/perseus/, and take +// -DKOKKOS_USE_CUDA_UVM from the command-line arguments there. I +// hesitate to do that now, because I'm not sure if all the files are +// including KokkosCore_config.h (or a header file that includes it) like +// they should. + +#if ! defined(KOKKOS_USE_CUDA_UVM) +#cmakedefine KOKKOS_USE_CUDA_UVM +#endif // ! defined(KOKKOS_USE_CUDA_UVM) + +#cmakedefine KOKKOS_HAVE_PTHREAD +#cmakedefine KOKKOS_HAVE_SERIAL +#cmakedefine KOKKOS_HAVE_QTHREAD +#cmakedefine KOKKOS_HAVE_Winthread +#cmakedefine KOKKOS_HAVE_OPENMP +#cmakedefine KOKKOS_HAVE_HWLOC +#cmakedefine KOKKOS_HAVE_DEBUG +#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK +#cmakedefine KOKKOS_HAVE_CXX11 +#cmakedefine KOKKOS_HAVE_CUSPARSE +#cmakedefine KOKKOS_ENABLE_PROFILING_INTERNAL +#ifdef KOKKOS_ENABLE_PROFILING_INTERNAL +#define KOKKOS_ENABLE_PROFILING 1 +#else +#define KOKKOS_ENABLE_PROFILING 0 +#endif + +// Don't forbid users from defining this macro on the command line, +// but still make sure that CMake logic can control its definition. +#if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +#cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 +#endif // KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + +#cmakedefine KOKKOS_USING_DEPRECATED_VIEW + +#endif // KOKKOS_FOR_SIERRA +#endif // KOKKOS_CORE_CONFIG_H diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d93ca14d96fe159def46c29165e743313f91c9c4 --- /dev/null +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -0,0 +1,29 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES + PerfTestMain.cpp + PerfTestHost.cpp + PerfTestCuda.cpp + ) + +# Per #374, we always want to build this test, but we only want to run +# it as a PERFORMANCE test. That's why we separate building the test +# from running the test. + +TRIBITS_ADD_EXECUTABLE( + PerfTestExec + SOURCES ${SOURCES} + COMM serial mpi + TESTONLYLIBS kokkos_gtest + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + PerfTest + NAME PerfTestExec + COMM serial mpi + NUM_MPI_PROCS 1 + CATEGORIES PERFORMANCE + FAIL_REGULAR_EXPRESSION " FAILED " + ) diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8fa1fbfc3c00795cf0739a95f1fd23a988b30fa6 --- /dev/null +++ b/lib/kokkos/core/perf_test/Makefile @@ -0,0 +1,66 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../tpls/gtest + +vpath %.cpp ${KOKKOS_PATH}/core/perf_test + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test + +TEST_TARGETS = +TARGETS = + +OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o +TARGETS += KokkosCore_PerformanceTest +TEST_TARGETS += test-performance + +OBJ_ATOMICS = test_atomic.o +TARGETS += KokkosCore_PerformanceTest_Atomics +TEST_TARGETS += test-atomic + + +KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest + +KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics + +test-performance: KokkosCore_PerformanceTest + ./KokkosCore_PerformanceTest + +test-atomic: KokkosCore_PerformanceTest_Atomics + ./KokkosCore_PerformanceTest_Atomics + + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aa4046cbf047defd47a89141d960ad330622d9b7 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp @@ -0,0 +1,309 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BLAS_KERNELS_HPP +#define KOKKOS_BLAS_KERNELS_HPP + +namespace Kokkos { + +template< class ConstVectorType , + class Device = typename ConstVectorType::execution_space > +struct Dot ; + +template< class ConstVectorType , + class Device = typename ConstVectorType::execution_space > +struct DotSingle ; + +template< class ConstScalarType , + class VectorType , + class Device = typename VectorType::execution_space > +struct Scale ; + +template< class ConstScalarType , + class ConstVectorType , + class VectorType , + class Device = typename VectorType::execution_space > +struct AXPBY ; + +/** \brief Y = alpha * X + beta * Y */ +template< class ConstScalarType , + class ConstVectorType , + class VectorType > +void axpby( const ConstScalarType & alpha , + const ConstVectorType & X , + const ConstScalarType & beta , + const VectorType & Y ) +{ + typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ; + + parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) ); +} + +/** \brief Y *= alpha */ +template< class ConstScalarType , + class VectorType > +void scale( const ConstScalarType & alpha , const VectorType & Y ) +{ + typedef Scale< ConstScalarType , VectorType > functor ; + + parallel_for( Y.dimension_0() , functor( alpha , Y ) ); +} + +template< class ConstVectorType , + class Finalize > +void dot( const ConstVectorType & X , + const ConstVectorType & Y , + const Finalize & finalize ) +{ + typedef Dot< ConstVectorType > functor ; + + parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize ); +} + +template< class ConstVectorType , + class Finalize > +void dot( const ConstVectorType & X , + const Finalize & finalize ) +{ + typedef DotSingle< ConstVectorType > functor ; + + parallel_reduce( X.dimension_0() , functor( X ) , finalize ); +} + +} /* namespace Kokkos */ + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class Type , class Device > +struct Dot +{ + typedef typename Device::execution_space execution_space ; + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 1 > , + Impl::unsigned_< Type::Rank > >::type ok_rank ; + + +/* typedef typename + Impl::StaticAssertSame< execution_space , + typename Type::execution_space >::type ok_device ;*/ + + typedef double value_type ; + +#if 1 + typename Type::const_type X ; + typename Type::const_type Y ; +#else + Type X ; + Type Y ; +#endif + + Dot( const Type & arg_x , const Type & arg_y ) + : X(arg_x) , Y(arg_y) { } + + KOKKOS_INLINE_FUNCTION + void operator()( int i , value_type & update ) const + { update += X[i] * Y[i]; } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & source ) + { update += source; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } +}; + +template< class Type , class Device > +struct DotSingle +{ + typedef typename Device::execution_space execution_space ; + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 1 > , + Impl::unsigned_< Type::Rank > >::type ok_rank ; + +/* typedef typename + Impl::StaticAssertSame< execution_space , + typename Type::execution_space >::type ok_device ;*/ + + typedef double value_type ; + +#if 1 + typename Type::const_type X ; +#else + Type X ; +#endif + + DotSingle( const Type & arg_x ) : X(arg_x) {} + + KOKKOS_INLINE_FUNCTION + void operator()( int i , value_type & update ) const + { + const typename Type::value_type & x = X[i]; update += x * x ; + } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & source ) + { update += source; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } +}; + + +template< class ScalarType , class VectorType , class Device> +struct Scale +{ + typedef typename Device::execution_space execution_space ; + +/* typedef typename + Impl::StaticAssertSame< execution_space , + typename ScalarType::execution_space >::type + ok_scalar_device ; + + typedef typename + Impl::StaticAssertSame< execution_space , + typename VectorType::execution_space >::type + ok_vector_device ;*/ + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 0 > , + Impl::unsigned_< ScalarType::Rank > >::type + ok_scalar_rank ; + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 1 > , + Impl::unsigned_< VectorType::Rank > >::type + ok_vector_rank ; + +#if 1 + typename ScalarType::const_type alpha ; +#else + ScalarType alpha ; +#endif + + VectorType Y ; + + Scale( const ScalarType & arg_alpha , const VectorType & arg_Y ) + : alpha( arg_alpha ), Y( arg_Y ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( int i ) const + { + Y[i] *= alpha() ; + } +}; + + +template< class ScalarType , + class ConstVectorType , + class VectorType, + class Device> +struct AXPBY +{ + typedef typename Device::execution_space execution_space ; + +/* typedef typename + Impl::StaticAssertSame< execution_space , + typename ScalarType::execution_space >::type + ok_scalar_device ; + + typedef typename + Impl::StaticAssertSame< execution_space , + typename ConstVectorType::execution_space >::type + ok_const_vector_device ; + + typedef typename + Impl::StaticAssertSame< execution_space , + typename VectorType::execution_space >::type + ok_vector_device ;*/ + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 0 > , + Impl::unsigned_< ScalarType::Rank > >::type + ok_scalar_rank ; + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 1 > , + Impl::unsigned_< ConstVectorType::Rank > >::type + ok_const_vector_rank ; + + typedef typename + Impl::StaticAssertSame< Impl::unsigned_< 1 > , + Impl::unsigned_< VectorType::Rank > >::type + ok_vector_rank ; + +#if 1 + typename ScalarType::const_type alpha , beta ; + typename ConstVectorType::const_type X ; +#else + ScalarType alpha , beta ; + ConstVectorType X ; +#endif + + VectorType Y ; + + AXPBY( const ScalarType & arg_alpha , + const ConstVectorType & arg_X , + const ScalarType & arg_beta , + const VectorType & arg_Y ) + : alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( int i ) const + { + Y[i] = alpha() * X[i] + beta() * Y[i] ; + } +}; + +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */ diff --git a/lib/kokkos/core/perf_test/PerfTestCuda.cpp b/lib/kokkos/core/perf_test/PerfTestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..524beb8b90303ba21fe06bb2cf60b0100b480169 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestCuda.cpp @@ -0,0 +1,189 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <iomanip> +#include <algorithm> +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) + +#include <impl/Kokkos_Timer.hpp> + +#include <PerfTestHexGrad.hpp> +#include <PerfTestBlasKernels.hpp> +#include <PerfTestGramSchmidt.hpp> +#include <PerfTestDriver.hpp> + + +namespace Test { + +class cuda : public ::testing::Test { + protected: + static void SetUpTestCase() { + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( cuda, hexgrad ) +{ + EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); +} + +TEST_F( cuda, gramschmidt ) +{ + EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); +} + +namespace { + +template <typename T> +struct TextureFetch +{ + typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type; + typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type; + typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type; + typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type; + + struct FillArray + { + array_type m_array; + FillArray( const array_type & array ) + : m_array(array) + {} + + void apply() const + { + Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { m_array(i) = i; } + }; + + struct RandomIndexes + { + index_array_type m_indexes; + typename index_array_type::HostMirror m_host_indexes; + RandomIndexes( const index_array_type & indexes) + : m_indexes(indexes) + , m_host_indexes(Kokkos::create_mirror(m_indexes)) + {} + + void apply() const + { + Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this); + //random shuffle + Kokkos::HostSpace::execution_space::fence(); + std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0()); + Kokkos::deep_copy(m_indexes,m_host_indexes); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { m_host_indexes(i) = i; } + }; + + struct RandomReduce + { + const_array_type m_array; + const_index_array_type m_indexes; + RandomReduce( const const_array_type & array, const const_index_array_type & indexes) + : m_array(array) + , m_indexes(indexes) + {} + + void apply(T & reduce) const + { + Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i, T & reduce) const + { reduce += m_array(m_indexes(i)); } + }; + + static void run(int size, double & reduce_time, T &reduce) + { + array_type array("array",size); + index_array_type indexes("indexes",size); + + { FillArray f(array); f.apply(); } + { RandomIndexes f(indexes); f.apply(); } + + Kokkos::Cuda::fence(); + + Kokkos::Timer timer; + for (int j=0; j<10; ++j) { + RandomReduce f(array,indexes); + f.apply(reduce); + } + Kokkos::Cuda::fence(); + reduce_time = timer.seconds(); + } +}; + +} // unnamed namespace + +TEST_F( cuda, texture_double ) +{ + printf("Random reduce of double through texture fetch\n"); + for (int i=1; i<=26; ++i) { + int size = 1<<i; + double time = 0; + double reduce = 0; + TextureFetch<double>::run(size,time,reduce); + printf(" time = %1.3e size = 2^%d\n", time, i); + } +} + +} // namespace Test + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ + diff --git a/lib/kokkos/core/perf_test/PerfTestDriver.hpp b/lib/kokkos/core/perf_test/PerfTestDriver.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e3dd3b4123a2dae6fd4f69f77a046796f9c040c8 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestDriver.hpp @@ -0,0 +1,152 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <string> + +// mfh 06 Jun 2013: This macro doesn't work like one might thing it +// should. It doesn't take the template parameter DeviceType and +// print its actual type name; it just literally prints out +// "DeviceType". I've worked around this below without using the +// macro, so I'm commenting out the macro to avoid compiler complaints +// about an unused macro. + +// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X +// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X ) + +//------------------------------------------------------------------------ + +namespace Test { + +enum { NUMBER_OF_TRIALS = 5 }; + + + +template< class DeviceType > +void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] ) +{ + std::string label_hexgrad ; + label_hexgrad.append( "\"HexGrad< double , " ); + // mfh 06 Jun 2013: This only appends "DeviceType" (literally) to + // the string, not the actual name of the device type. Thus, I've + // modified the function to take the name of the device type. + // + //label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) ); + label_hexgrad.append( deviceTypeName ); + label_hexgrad.append( " >\"" ); + + for (int i = exp_beg ; i < exp_end ; ++i) { + double min_seconds = 0.0 ; + double max_seconds = 0.0 ; + double avg_seconds = 0.0 ; + + const int parallel_work_length = 1<<i; + + for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) { + const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ; + + if ( 0 == j ) { + min_seconds = seconds ; + max_seconds = seconds ; + } + else { + if ( seconds < min_seconds ) min_seconds = seconds ; + if ( seconds > max_seconds ) max_seconds = seconds ; + } + avg_seconds += seconds ; + } + avg_seconds /= NUMBER_OF_TRIALS ; + + std::cout << label_hexgrad + << " , " << parallel_work_length + << " , " << min_seconds + << " , " << ( min_seconds / parallel_work_length ) + << std::endl ; + } +} + +template< class DeviceType > +void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] ) +{ + std::string label_gramschmidt ; + label_gramschmidt.append( "\"GramSchmidt< double , " ); + // mfh 06 Jun 2013: This only appends "DeviceType" (literally) to + // the string, not the actual name of the device type. Thus, I've + // modified the function to take the name of the device type. + // + //label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) ); + label_gramschmidt.append( deviceTypeName ); + label_gramschmidt.append( " >\"" ); + + for (int i = exp_beg ; i < exp_end ; ++i) { + double min_seconds = 0.0 ; + double max_seconds = 0.0 ; + double avg_seconds = 0.0 ; + + const int parallel_work_length = 1<<i; + + for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) { + const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ; + + if ( 0 == j ) { + min_seconds = seconds ; + max_seconds = seconds ; + } + else { + if ( seconds < min_seconds ) min_seconds = seconds ; + if ( seconds > max_seconds ) max_seconds = seconds ; + } + avg_seconds += seconds ; + } + avg_seconds /= NUMBER_OF_TRIALS ; + + std::cout << label_gramschmidt + << " , " << parallel_work_length + << " , " << min_seconds + << " , " << ( min_seconds / parallel_work_length ) + << std::endl ; + } +} + +} + diff --git a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp new file mode 100644 index 0000000000000000000000000000000000000000..516696b141d22ab5ac0662ef2c6d78fae8c9b8ad --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp @@ -0,0 +1,226 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cmath> +#include <PerfTestBlasKernels.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Test { + +// Reduction : result = dot( Q(:,j) , Q(:,j) ); +// PostProcess : R(j,j) = result ; inv = 1 / result ; +template< class VectorView , class ValueView > +struct InvNorm2 : public Kokkos::DotSingle< VectorView > { + + typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ; + + ValueView Rjj ; + ValueView inv ; + + InvNorm2( const VectorView & argX , + const ValueView & argR , + const ValueView & argInv ) + : Kokkos::DotSingle< VectorView >( argX ) + , Rjj( argR ) + , inv( argInv ) + {} + + KOKKOS_INLINE_FUNCTION + void final( value_type & result ) const + { + result = sqrt( result ); + Rjj() = result ; + inv() = ( 0 < result ) ? 1.0 / result : 0 ; + } +}; + +template< class VectorView , class ValueView > +inline +void invnorm2( const VectorView & x , + const ValueView & r , + const ValueView & r_inv ) +{ + Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) ); +} + +// PostProcess : tmp = - ( R(j,k) = result ); +template< class VectorView , class ValueView > +struct DotM : public Kokkos::Dot< VectorView > { + + typedef typename Kokkos::Dot< VectorView >::value_type value_type ; + + ValueView Rjk ; + ValueView tmp ; + + DotM( const VectorView & argX , + const VectorView & argY , + const ValueView & argR , + const ValueView & argTmp ) + : Kokkos::Dot< VectorView >( argX , argY ) + , Rjk( argR ) + , tmp( argTmp ) + {} + + KOKKOS_INLINE_FUNCTION + void final( value_type & result ) const + { + Rjk() = result ; + tmp() = - result ; + } +}; + +template< class VectorView , class ValueView > +inline +void dot_neg( const VectorView & x , + const VectorView & y , + const ValueView & r , + const ValueView & r_neg ) +{ + Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) ); +} + + +template< typename Scalar , class DeviceType > +struct ModifiedGramSchmidt +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef Kokkos::View< Scalar** , + Kokkos::LayoutLeft , + execution_space > multivector_type ; + + typedef Kokkos::View< Scalar* , + Kokkos::LayoutLeft , + execution_space > vector_type ; + + typedef Kokkos::View< Scalar , + Kokkos::LayoutLeft , + execution_space > value_view ; + + + multivector_type Q ; + multivector_type R ; + + static double factorization( const multivector_type Q_ , + const multivector_type R_ ) + { + const size_type count = Q_.dimension_1(); + value_view tmp("tmp"); + value_view one("one"); + + Kokkos::deep_copy( one , (Scalar) 1 ); + + Kokkos::Timer timer ; + + for ( size_type j = 0 ; j < count ; ++j ) { + // Reduction : tmp = dot( Q(:,j) , Q(:,j) ); + // PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ; + const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j ); + const value_view Rjj = Kokkos::subview( R_ , j , j ); + + invnorm2( Qj , Rjj , tmp ); + + // Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ; + Kokkos::scale( tmp , Qj ); + + for ( size_t k = j + 1 ; k < count ; ++k ) { + const vector_type Qk = Kokkos::subview( Q_ , Kokkos::ALL() , k ); + const value_view Rjk = Kokkos::subview( R_ , j , k ); + + // Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) ); + // PostProcess : tmp = - R(j,k); + dot_neg( Qj , Qk , Rjk , tmp ); + + // Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j) + Kokkos::axpby( tmp , Qj , one , Qk ); + } + } + + execution_space::fence(); + + return timer.seconds(); + } + + //-------------------------------------------------------------------------- + + static double test( const size_t length , + const size_t count , + const size_t iter = 1 ) + { + multivector_type Q_( "Q" , length , count ); + multivector_type R_( "R" , count , count ); + + typename multivector_type::HostMirror A = + Kokkos::create_mirror( Q_ ); + + // Create and fill A on the host + + for ( size_type j = 0 ; j < count ; ++j ) { + for ( size_type i = 0 ; i < length ; ++i ) { + A(i,j) = ( i + 1 ) * ( j + 1 ); + } + } + + double dt_min = 0 ; + + for ( size_t i = 0 ; i < iter ; ++i ) { + + Kokkos::deep_copy( Q_ , A ); + + // A = Q * R + + const double dt = factorization( Q_ , R_ ); + + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + } + + return dt_min ; + } +}; + +} + diff --git a/lib/kokkos/core/perf_test/PerfTestHexGrad.hpp b/lib/kokkos/core/perf_test/PerfTestHexGrad.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ed5371f29c4db0fc2af4613d301006b1e96a0f28 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestHexGrad.hpp @@ -0,0 +1,268 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace Test { + +template< class DeviceType , + typename CoordScalarType = double , + typename GradScalarType = float > +struct HexGrad +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type; + + // 3D array : ( ParallelWork , Space , Node ) + + enum { NSpace = 3 , NNode = 8 }; + + typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space > + elem_coord_type ; + + typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space > + elem_grad_type ; + + elem_coord_type coords ; + elem_grad_type grad_op ; + + enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) }; + enum { READS = 18 }; + enum { WRITES = 18 }; + + HexGrad( const elem_coord_type & arg_coords , + const elem_grad_type & arg_grad_op ) + : coords( arg_coords ) + , grad_op( arg_grad_op ) + {} + + KOKKOS_INLINE_FUNCTION static + void grad( const CoordScalarType x[] , + const CoordScalarType z[] , + GradScalarType grad_y[] ) + { + const GradScalarType R42=(x[3] - x[1]); + const GradScalarType R52=(x[4] - x[1]); + const GradScalarType R54=(x[4] - x[3]); + + const GradScalarType R63=(x[5] - x[2]); + const GradScalarType R83=(x[7] - x[2]); + const GradScalarType R86=(x[7] - x[5]); + + const GradScalarType R31=(x[2] - x[0]); + const GradScalarType R61=(x[5] - x[0]); + const GradScalarType R74=(x[6] - x[3]); + + const GradScalarType R72=(x[6] - x[1]); + const GradScalarType R75=(x[6] - x[4]); + const GradScalarType R81=(x[7] - x[0]); + + const GradScalarType t1=(R63 + R54); + const GradScalarType t2=(R61 + R74); + const GradScalarType t3=(R72 + R81); + + const GradScalarType t4 =(R86 + R42); + const GradScalarType t5 =(R83 + R52); + const GradScalarType t6 =(R75 + R31); + + // Calculate Y gradient from X and Z data + + grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54); + grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61); + grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72); + grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83); + grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61); + grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72); + grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83); + grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type ielem ) const + { + GradScalarType g[NNode] ; + + const CoordScalarType x[NNode] = { + coords(ielem,0,0), + coords(ielem,0,1), + coords(ielem,0,2), + coords(ielem,0,3), + coords(ielem,0,4), + coords(ielem,0,5), + coords(ielem,0,6), + coords(ielem,0,7) + }; + + const CoordScalarType y[NNode] = { + coords(ielem,1,0), + coords(ielem,1,1), + coords(ielem,1,2), + coords(ielem,1,3), + coords(ielem,1,4), + coords(ielem,1,5), + coords(ielem,1,6), + coords(ielem,1,7) + }; + + const CoordScalarType z[NNode] = { + coords(ielem,2,0), + coords(ielem,2,1), + coords(ielem,2,2), + coords(ielem,2,3), + coords(ielem,2,4), + coords(ielem,2,5), + coords(ielem,2,6), + coords(ielem,2,7) + }; + + grad( z , y , g ); + + grad_op(ielem,0,0) = g[0]; + grad_op(ielem,0,1) = g[1]; + grad_op(ielem,0,2) = g[2]; + grad_op(ielem,0,3) = g[3]; + grad_op(ielem,0,4) = g[4]; + grad_op(ielem,0,5) = g[5]; + grad_op(ielem,0,6) = g[6]; + grad_op(ielem,0,7) = g[7]; + + grad( x , z , g ); + + grad_op(ielem,1,0) = g[0]; + grad_op(ielem,1,1) = g[1]; + grad_op(ielem,1,2) = g[2]; + grad_op(ielem,1,3) = g[3]; + grad_op(ielem,1,4) = g[4]; + grad_op(ielem,1,5) = g[5]; + grad_op(ielem,1,6) = g[6]; + grad_op(ielem,1,7) = g[7]; + + grad( y , x , g ); + + grad_op(ielem,2,0) = g[0]; + grad_op(ielem,2,1) = g[1]; + grad_op(ielem,2,2) = g[2]; + grad_op(ielem,2,3) = g[3]; + grad_op(ielem,2,4) = g[4]; + grad_op(ielem,2,5) = g[5]; + grad_op(ielem,2,6) = g[6]; + grad_op(ielem,2,7) = g[7]; + } + + //-------------------------------------------------------------------------- + + struct Init { + typedef typename self_type::execution_space execution_space ; + + elem_coord_type coords ; + + Init( const elem_coord_type & arg_coords ) + : coords( arg_coords ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( size_type ielem ) const + { + coords(ielem,0,0) = 0.; + coords(ielem,1,0) = 0.; + coords(ielem,2,0) = 0.; + + coords(ielem,0,1) = 1.; + coords(ielem,1,1) = 0.; + coords(ielem,2,1) = 0.; + + coords(ielem,0,2) = 1.; + coords(ielem,1,2) = 1.; + coords(ielem,2,2) = 0.; + + coords(ielem,0,3) = 0.; + coords(ielem,1,3) = 1.; + coords(ielem,2,3) = 0.; + + + coords(ielem,0,4) = 0.; + coords(ielem,1,4) = 0.; + coords(ielem,2,4) = 1.; + + coords(ielem,0,5) = 1.; + coords(ielem,1,5) = 0.; + coords(ielem,2,5) = 1.; + + coords(ielem,0,6) = 1.; + coords(ielem,1,6) = 1.; + coords(ielem,2,6) = 1.; + + coords(ielem,0,7) = 0.; + coords(ielem,1,7) = 1.; + coords(ielem,2,7) = 1.; + } + }; + + //-------------------------------------------------------------------------- + + static double test( const int count , const int iter = 1 ) + { + elem_coord_type coord( "coord" , count ); + elem_grad_type grad ( "grad" , count ); + + // Execute the parallel kernels on the arrays: + + double dt_min = 0 ; + + Kokkos::parallel_for( count , Init( coord ) ); + execution_space::fence(); + + for ( int i = 0 ; i < iter ; ++i ) { + Kokkos::Timer timer ; + Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) ); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + } + + return dt_min ; + } +}; + +} + diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6a0f2efadacd01e979d3beefd23b617b81acff48 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestHost.cpp @@ -0,0 +1,104 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_OPENMP ) + +typedef Kokkos::OpenMP TestHostDevice ; +const char TestHostDeviceName[] = "Kokkos::OpenMP" ; + +#elif defined( KOKKOS_HAVE_PTHREAD ) + +typedef Kokkos::Threads TestHostDevice ; +const char TestHostDeviceName[] = "Kokkos::Threads" ; + +#elif defined( KOKKOS_HAVE_SERIAL ) + +typedef Kokkos::Serial TestHostDevice ; +const char TestHostDeviceName[] = "Kokkos::Serial" ; + +#else +# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial." +#endif + +#include <impl/Kokkos_Timer.hpp> + +#include <PerfTestHexGrad.hpp> +#include <PerfTestBlasKernels.hpp> +#include <PerfTestGramSchmidt.hpp> +#include <PerfTestDriver.hpp> + +//------------------------------------------------------------------------ + +namespace Test { + +class host : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned team_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned threads_per_team = 4 ; + + TestHostDevice::initialize( team_count * threads_per_team ); + } + + static void TearDownTestCase() + { + TestHostDevice::finalize(); + } +}; + +TEST_F( host, hexgrad ) { + EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName )); +} + +TEST_F( host, gramschmidt ) { + EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName )); +} + +} // namespace Test + + diff --git a/lib/kokkos/core/perf_test/PerfTestMain.cpp b/lib/kokkos/core/perf_test/PerfTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ac916308292076fc27231968715518b3f5c02f80 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestMain.cpp @@ -0,0 +1,49 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ab73f2505e28df6bda1c8f4a43d66fc20093bf2a --- /dev/null +++ b/lib/kokkos/core/perf_test/test_atomic.cpp @@ -0,0 +1,507 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> +#include <cstring> +#include <cstdlib> + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> + +typedef Kokkos::DefaultExecutionSpace exec_space; + +#define RESET 0 +#define BRIGHT 1 +#define DIM 2 +#define UNDERLINE 3 +#define BLINK 4 +#define REVERSE 7 +#define HIDDEN 8 + +#define BLACK 0 +#define RED 1 +#define GREEN 2 +#define YELLOW 3 +#define BLUE 4 +#define MAGENTA 5 +#define CYAN 6 +#define GREY 7 +#define WHITE 8 + +void textcolor(int attr, int fg, int bg) +{ char command[13]; + + /* Command is the control command to the terminal */ + sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40); + printf("%s", command); +} +void textcolor_standard() {textcolor(RESET, BLACK, WHITE);} + + +template<class T,class DEVICE_TYPE> +struct ZeroFunctor{ + typedef DEVICE_TYPE execution_space; + typedef typename Kokkos::View<T,execution_space> type; + typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + data() = 0; + } +}; + +//--------------------------------------------------- +//--------------atomic_fetch_add--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct AddFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + Kokkos::atomic_fetch_add(&data(),(T)1); + } +}; + +template<class T> +T AddLoop(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct AddFunctor<T,exec_space> f_add; + f_add.data = data; + Kokkos::parallel_for(loop,f_add); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T,class DEVICE_TYPE> +struct AddNonAtomicFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + data()+=(T)1; + } +}; + +template<class T> +T AddLoopNonAtomic(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct AddNonAtomicFunctor<T,exec_space> f_add; + f_add.data = data; + Kokkos::parallel_for(loop,f_add); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + + return val; +} + +template<class T> +T AddLoopSerial(int loop) { + T* data = new T[1]; + data[0] = 0; + + for(int i=0;i<loop;i++) + *data+=(T)1; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DEVICE_TYPE> +struct CASFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + T old = data(); + T newval, assumed; + do { + assumed = old; + newval = assumed + (T)1; + old = Kokkos::atomic_compare_exchange(&data(), assumed, newval); + } + while( old != assumed ); + } +}; + +template<class T> +T CASLoop(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct CASFunctor<T,exec_space> f_cas; + f_cas.data = data; + Kokkos::parallel_for(loop,f_cas); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + + return val; +} + +template<class T,class DEVICE_TYPE> +struct CASNonAtomicFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + volatile T assumed; + volatile T newval; + bool fail=1; + do { + assumed = data(); + newval = assumed + (T)1; + if(data()==assumed) { + data() = newval; + fail = 0; + } + } + while(fail); + } +}; + +template<class T> +T CASLoopNonAtomic(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct CASNonAtomicFunctor<T,exec_space> f_cas; + f_cas.data = data; + Kokkos::parallel_for(loop,f_cas); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + + return val; +} + +template<class T> +T CASLoopSerial(int loop) { + T* data = new T[1]; + data[0] = 0; + + for(int i=0;i<loop;i++) { + T assumed; + T newval; + T old; + do { + assumed = *data; + newval = assumed + (T)1; + old = *data; + *data = newval; + } + while(!(assumed==old)); + } + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DEVICE_TYPE> +struct ExchFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data, data2; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + T old = Kokkos::atomic_exchange(&data(),(T)i); + Kokkos::atomic_fetch_add(&data2(),old); + } +}; + +template<class T> +T ExchLoop(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + typename ZeroFunctor<T,exec_space>::type data2("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data2("HData"); + f_zero.data = data2; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct ExchFunctor<T,exec_space> f_exch; + f_exch.data = data; + f_exch.data2 = data2; + Kokkos::parallel_for(loop,f_exch); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy(h_data2,data2); + T val = h_data() + h_data2(); + + return val; +} + +template<class T,class DEVICE_TYPE> +struct ExchNonAtomicFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data, data2; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + T old = data(); + data()=(T) i; + data2()+=old; + } +}; + + +template<class T> +T ExchLoopNonAtomic(int loop) { + struct ZeroFunctor<T,exec_space> f_zero; + typename ZeroFunctor<T,exec_space>::type data("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + typename ZeroFunctor<T,exec_space>::type data2("Data"); + typename ZeroFunctor<T,exec_space>::h_type h_data2("HData"); + f_zero.data = data2; + Kokkos::parallel_for(1,f_zero); + exec_space::fence(); + + struct ExchNonAtomicFunctor<T,exec_space> f_exch; + f_exch.data = data; + f_exch.data2 = data2; + Kokkos::parallel_for(loop,f_exch); + exec_space::fence(); + + Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy(h_data2,data2); + T val = h_data() + h_data2(); + + return val; +} + +template<class T> +T ExchLoopSerial(int loop) { + T* data = new T[1]; + T* data2 = new T[1]; + data[0] = 0; + data2[0] = 0; + for(int i=0;i<loop;i++) { + T old = *data; + *data=(T) i; + *data2+=old; + } + + T val = *data2 + *data; + delete [] data; + delete [] data2; + return val; +} + +template<class T> +T LoopVariant(int loop, int test) { + switch (test) { + case 1: return AddLoop<T>(loop); + case 2: return CASLoop<T>(loop); + case 3: return ExchLoop<T>(loop); + } + return 0; +} + +template<class T> +T LoopVariantSerial(int loop, int test) { + switch (test) { + case 1: return AddLoopSerial<T>(loop); + case 2: return CASLoopSerial<T>(loop); + case 3: return ExchLoopSerial<T>(loop); + } + return 0; +} + +template<class T> +T LoopVariantNonAtomic(int loop, int test) { + switch (test) { + case 1: return AddLoopNonAtomic<T>(loop); + case 2: return CASLoopNonAtomic<T>(loop); + case 3: return ExchLoopNonAtomic<T>(loop); + } + return 0; +} + +template<class T> +void Loop(int loop, int test, const char* type_name) { + LoopVariant<T>(loop,test); + + Kokkos::Impl::Timer timer; + T res = LoopVariant<T>(loop,test); + double time = timer.seconds(); + + timer.reset(); + T resNonAtomic = LoopVariantNonAtomic<T>(loop,test); + double timeNonAtomic = timer.seconds(); + + timer.reset(); + T resSerial = LoopVariantSerial<T>(loop,test); + double timeSerial = timer.seconds(); + + time *=1e6/loop; + timeNonAtomic*=1e6/loop; + timeSerial *=1e6/loop; + //textcolor_standard(); + bool passed = true; + if(resSerial!=res) passed = false; + //if(!passed) textcolor(RESET,BLACK,YELLOW); + printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)", + type_name,test,passed?"PASSED":"FAILED",loop, + 1.0*resSerial,1.0*res,1.0*resNonAtomic, + timeSerial,time,timeNonAtomic,(int)sizeof(T)); + //if(!passed) textcolor_standard(); + printf("\n"); +} + + +template<class T> +void Test(int loop, int test, const char* type_name) { + if(test==-1) { + Loop<T>(loop,1,type_name); + Loop<T>(loop,2,type_name); + Loop<T>(loop,3,type_name); + + } + else + Loop<T>(loop,test,type_name); +} + +int main(int argc, char* argv[]) +{ + int type = -1; + int loop = 100000; + int test = -1; + + for(int i=0;i<argc;i++) + { + if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;} + } + + + Kokkos::initialize(argc,argv); + + + printf("Using %s\n",Kokkos::atomic_query_version()); + bool all_tests = false; + if(type==-1) all_tests = true; + while(type<100) { + if(type==1) { + Test<int>(loop,test,"int "); + } + if(type==2) { + Test<long int>(loop,test,"long int "); + } + if(type==3) { + Test<long long int>(loop,test,"long long int "); + } + if(type==4) { + Test<unsigned int>(loop,test,"unsigned int "); + } + if(type==5) { + Test<unsigned long int>(loop,test,"unsigned long int "); + } + if(type==6) { + Test<unsigned long long int>(loop,test,"unsigned long long int "); + } + if(type==10) { + //Test<float>(loop,test,"float "); + } + if(type==11) { + Test<double>(loop,test,"double "); + } + if(!all_tests) type=100; + else type++; + } + + Kokkos::finalize(); + +} + diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..807a01ed01b128c531b87df0c27e1d406525b603 --- /dev/null +++ b/lib/kokkos/core/src/CMakeLists.txt @@ -0,0 +1,113 @@ + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Serial + KOKKOS_HAVE_SERIAL + "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads." + ON + ) + +ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11) +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA) + +# Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA governs whether Kokkos allows +# use of lambdas at the outer level of parallel dispatch (that is, as +# the argument to an outer parallel_for, parallel_reduce, or +# parallel_scan). This works with non-CUDA execution spaces if C++11 +# is enabled. It does not currently work with public releases of +# CUDA. If that changes, please change the default here to ON if CUDA +# and C++11 are ON. +IF (${PROJECT_NAME}_ENABLE_CXX11) + IF (${PACKAGE_NAME}_ENABLE_CUDA) + SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF) + ELSE () + SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT ON) + ENDIF () +ELSE () + SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF) +ENDIF () + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA + KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + "Whether Kokkos allows use of lambdas at the outer level of parallel dispatch (that is, as the argument to an outer parallel_for, parallel_reduce, or parallel_scan). This requires C++11. It also does not currently work with public releases of CUDA. As a result, even if C++11 is enabled, this will be OFF by default if CUDA is enabled. If this option is ON, the macro KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA will be defined. For compatibility with Kokkos' Makefile build system, it is also possible to define that macro on the command line." + ${Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT} + ) + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + +#----------------------------------------------------------------------------- + +SET(HEADERS_PUBLIC "") +SET(HEADERS_PRIVATE "") +SET(SOURCES "") + +FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp) +LIST( APPEND HEADERS_PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h ) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS_IMPL impl/*.hpp) +FILE(GLOB SOURCES_IMPL impl/*.cpp) + +LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} ) +LIST(APPEND SOURCES ${SOURCES_IMPL} ) + +INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS_THREADS Threads/*.hpp) +FILE(GLOB SOURCES_THREADS Threads/*.cpp) + +LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} ) +LIST(APPEND SOURCES ${SOURCES_THREADS} ) + +INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp) +FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp) + +LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} ) +LIST(APPEND SOURCES ${SOURCES_OPENMP} ) + +INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS_CUDA Cuda/*.hpp) +FILE(GLOB SOURCES_CUDA Cuda/*.cpp) + +LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} ) +LIST(APPEND SOURCES ${SOURCES_CUDA} ) + +INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/) + +#----------------------------------------------------------------------------- +FILE(GLOB HEADERS_QTHREAD Qthread/*.hpp) +FILE(GLOB SOURCES_QTHREAD Qthread/*.cpp) + +LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREAD} ) +LIST(APPEND SOURCES ${SOURCES_QTHREAD} ) + +INSTALL(FILES ${HEADERS_QTHREAD} DESTINATION ${TRILINOS_INCDIR}/Qthread/) + +#----------------------------------------------------------------------------- + +TRIBITS_ADD_LIBRARY( + kokkoscore + HEADERS ${HEADERS_PUBLIC} + NOINSTALLHEADERS ${HEADERS_PRIVATE} + SOURCES ${SOURCES} + DEPLIBS + ) + + diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4ed7d8e2a8a40ef6434637f3e0ae72266e4c76bb --- /dev/null +++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp @@ -0,0 +1,334 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP +#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( KOKKOS_HAVE_CUDA ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template<> +struct ViewOperatorBoundsErrorAbort< Kokkos::CudaSpace > { + KOKKOS_INLINE_FUNCTION + static void apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ) + { + const int r = + ( n0 <= i0 ? 0 : + ( n1 <= i1 ? 1 : + ( n2 <= i2 ? 2 : + ( n3 <= i3 ? 3 : + ( n4 <= i4 ? 4 : + ( n5 <= i5 ? 5 : + ( n6 <= i6 ? 6 : 7 ))))))); + const size_t n = + ( n0 <= i0 ? n0 : + ( n1 <= i1 ? n1 : + ( n2 <= i2 ? n2 : + ( n3 <= i3 ? n3 : + ( n4 <= i4 ? n4 : + ( n5 <= i5 ? n5 : + ( n6 <= i6 ? n6 : n7 ))))))); + const size_t i = + ( n0 <= i0 ? i0 : + ( n1 <= i1 ? i1 : + ( n2 <= i2 ? i2 : + ( n3 <= i3 ? i3 : + ( n4 <= i4 ? i4 : + ( n5 <= i5 ? i5 : + ( n6 <= i6 ? i6 : i7 ))))))); + printf("Cuda view array bounds error index %d : FAILED %lu < %lu\n" , r , i , n ); + Kokkos::Impl::cuda_abort("Cuda view array bounds error"); + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4) +// Via reinterpret_case this can be used to support all scalar types of those sizes. +// Any other scalar type falls back to either normal reads out of global memory, +// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0) + +template< typename ValueType , typename AliasType > +struct CudaTextureFetch { + + ::cudaTextureObject_t m_obj ; + const ValueType * m_ptr ; + int m_offset ; + + // Deference operator pulls through texture object and returns by value + template< typename iType > + KOKKOS_INLINE_FUNCTION + ValueType operator[]( const iType & i ) const + { +#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) + AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset ); + return *(reinterpret_cast<ValueType*> (&v)); +#else + return m_ptr[ i ]; +#endif + } + + // Pointer to referenced memory + KOKKOS_INLINE_FUNCTION + operator const ValueType * () const { return m_ptr ; } + + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {} + + KOKKOS_INLINE_FUNCTION + ~CudaTextureFetch() {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch( const CudaTextureFetch & rhs ) + : m_obj( rhs.m_obj ) + , m_ptr( rhs.m_ptr ) + , m_offset( rhs.m_offset ) + {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch( CudaTextureFetch && rhs ) + : m_obj( rhs.m_obj ) + , m_ptr( rhs.m_ptr ) + , m_offset( rhs.m_offset ) + {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) + { + m_obj = rhs.m_obj ; + m_ptr = rhs.m_ptr ; + m_offset = rhs.m_offset ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch & operator = ( CudaTextureFetch && rhs ) + { + m_obj = rhs.m_obj ; + m_ptr = rhs.m_ptr ; + m_offset = rhs.m_offset ; + return *this ; + } + + // Texture object spans the entire allocation. + // This handle may view a subset of the allocation, so an offset is required. + template< class CudaMemorySpace > + inline explicit + CudaTextureFetch( const ValueType * const arg_ptr + , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record + ) + : m_obj( record.template attach_texture_object< AliasType >() ) + , m_ptr( arg_ptr ) + , m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) ) + {} +}; + +#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) + +template< typename ValueType , typename AliasType > +struct CudaLDGFetch { + + const ValueType * m_ptr ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + ValueType operator[]( const iType & i ) const + { + AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i])); + return *(reinterpret_cast<ValueType*> (&v)); + } + + KOKKOS_INLINE_FUNCTION + operator const ValueType * () const { return m_ptr ; } + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch() : m_ptr() {} + + KOKKOS_INLINE_FUNCTION + ~CudaLDGFetch() {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch( const CudaLDGFetch & rhs ) + : m_ptr( rhs.m_ptr ) + {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch( CudaLDGFetch && rhs ) + : m_ptr( rhs.m_ptr ) + {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch & operator = ( const CudaLDGFetch & rhs ) + { + m_ptr = rhs.m_ptr ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch & operator = ( CudaLDGFetch && rhs ) + { + m_ptr = rhs.m_ptr ; + return *this ; + } + + template< class CudaMemorySpace > + inline explicit + CudaTextureFetch( const ValueType * const arg_ptr + , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const & + ) + : m_ptr( arg_data_ptr ) + {} +}; + +#endif + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization + * if 'const' value type, CudaSpace and random access. + */ +template< class Traits > +class ViewDataHandle< Traits , + typename std::enable_if<( + // Is Cuda memory space + ( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || + std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ) + && + // Is a trivial const value of 4, 8, or 16 bytes + std::is_trivial<typename Traits::const_value_type>::value + && + std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value + && + ( sizeof(typename Traits::const_value_type) == 4 || + sizeof(typename Traits::const_value_type) == 8 || + sizeof(typename Traits::const_value_type) == 16 ) + && + // Random access trait + ( Traits::memory_traits::RandomAccess != 0 ) + )>::type > +{ +public: + + using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ; + + using value_type = typename Traits::const_value_type ; + using return_type = typename Traits::const_value_type ; // NOT a reference + + using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int , + typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 , + typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void + >::type + >::type + >::type ; + +#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) + using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ; +#else + using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ; +#endif + + KOKKOS_INLINE_FUNCTION + static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ ) + { + return arg_handle ; + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + // Assignment of texture = non-texture requires creation of a texture object + // which can only occur on the host. In addition, 'get_record' is only valid + // if called in a host execution space + return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() ); +#else + Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel"); + return handle_type(); +#endif + } +}; + +} +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ +#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d1a560ee04648dc8d34b9ec82cb44abddc9ae6e8 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -0,0 +1,318 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDAEXEC_HPP +#define KOKKOS_CUDAEXEC_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <string> +#include <Kokkos_Parallel.hpp> +#include <impl/Kokkos_Error.hpp> +#include <Cuda/Kokkos_Cuda_abort.hpp> +#include <Cuda/Kokkos_Cuda_Error.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct CudaTraits { + enum { WarpSize = 32 /* 0x0020 */ }; + enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; + enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; + + enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ }; + enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ }; + enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ }; + + enum { UpperBoundGridCount = 65535 /* Hard upper bound */ }; + enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ }; + enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; + enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; + + typedef unsigned long + ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; + + enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_count( CudaSpace::size_type i ) + { return ( i + WarpIndexMask ) >> WarpIndexShift ; } + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_align( CudaSpace::size_type i ) + { + enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; + return ( i + WarpIndexMask ) & Mask ; + } +}; + +//---------------------------------------------------------------------------- + +CudaSpace::size_type cuda_internal_multiprocessor_count(); +CudaSpace::size_type cuda_internal_maximum_warp_count(); +CudaSpace::size_type cuda_internal_maximum_grid_count(); +CudaSpace::size_type cuda_internal_maximum_shared_words(); + +CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size ); + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( __CUDACC__ ) + +/** \brief Access to constant memory on the device */ +#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE + +__device__ __constant__ +extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ; + +#else + +__device__ __constant__ +unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; + +#endif + + +namespace Kokkos { +namespace Impl { + struct CudaLockArraysStruct { + int* atomic; + int* scratch; + int* threadid; + }; +} +} +__device__ __constant__ +#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE +extern +#endif +Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ; + +#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF +#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39 + +namespace Kokkos { +namespace Impl { + void* cuda_resize_scratch_space(size_t bytes, bool force_shrink = false); +} +} + +namespace Kokkos { +namespace Impl { +__device__ inline +bool lock_address_cuda_space(void* ptr) { + size_t offset = size_t(ptr); + offset = offset >> 2; + offset = offset & CUDA_SPACE_ATOMIC_MASK; + return (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[offset],0,1)); +} + +__device__ inline +void unlock_address_cuda_space(void* ptr) { + size_t offset = size_t(ptr); + offset = offset >> 2; + offset = offset & CUDA_SPACE_ATOMIC_MASK; + atomicExch( &kokkos_impl_cuda_lock_arrays.atomic[ offset ], 0); +} + +} +} + +template< typename T > +inline +__device__ +T * kokkos_impl_cuda_shared_memory() +{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +// See section B.17 of Cuda C Programming Guide Version 3.2 +// for discussion of +// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) +// function qualifier which could be used to improve performance. +//---------------------------------------------------------------------------- +// Maximize L1 cache and minimize shared memory: +// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); +// For 2.0 capability: 48 KB L1 and 16 KB shared +//---------------------------------------------------------------------------- + +template< class DriverType > +__global__ +static void cuda_parallel_launch_constant_memory() +{ + const DriverType & driver = + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType > +__global__ +static void cuda_parallel_launch_local_memory( const DriverType driver ) +{ + driver(); +} + +template < class DriverType , + bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) > +struct CudaParallelLaunch ; + +template < class DriverType > +struct CudaParallelLaunch< DriverType , true > { + + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , const cudaStream_t stream = 0 ) + { + if ( grid.x && ( block.x * block.y * block.z ) ) { + + if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < + sizeof( DriverType ) ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); + } + + // Fence before changing settings and copying closure + Kokkos::Cuda::fence(); + + if ( CudaTraits::SharedMemoryCapacity < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER //On Kepler the L1 has no benefit since it doesn't cache reads + else if ( shmem ) { + CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared ) ); + } else { + CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 ) ); + } + #endif + + // Copy functor to constant memory on the device + cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) ); + + #ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE + Kokkos::Impl::CudaLockArraysStruct locks; + locks.atomic = atomic_lock_array_cuda_space_ptr(false); + locks.scratch = scratch_lock_array_cuda_space_ptr(false); + locks.threadid = threadid_lock_array_cuda_space_ptr(false); + cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); + #endif + + // Invoke the driver function on the device + cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>(); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda::fence(); +#endif + } + } +}; + +template < class DriverType > +struct CudaParallelLaunch< DriverType , false > { + + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , const cudaStream_t stream = 0 ) + { + if ( grid.x && ( block.x * block.y * block.z ) ) { + + if ( CudaTraits::SharedMemoryCapacity < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER //On Kepler the L1 has no benefit since it doesn't cache reads + else if ( shmem ) { + CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared ) ); + } else { + CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 ) ); + } + #endif + + #ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE + Kokkos::Impl::CudaLockArraysStruct locks; + locks.atomic = atomic_lock_array_cuda_space_ptr(false); + locks.scratch = scratch_lock_array_cuda_space_ptr(false); + locks.threadid = threadid_lock_array_cuda_space_ptr(false); + cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); + #endif + + cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda::fence(); +#endif + } + } +}; + +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined( __CUDACC__ ) */ +#endif /* defined( KOKKOS_HAVE_CUDA ) */ +#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4f372d65d1ee6456d9ff6d21cd4775d6fb6c448 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -0,0 +1,829 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdlib.h> +#include <iostream> +#include <sstream> +#include <stdexcept> +#include <algorithm> +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <Kokkos_Core.hpp> +#include <Kokkos_Cuda.hpp> +#include <Kokkos_CudaSpace.hpp> + +#include <Cuda/Kokkos_Cuda_Internal.hpp> +#include <impl/Kokkos_Error.hpp> + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +namespace { + cudaStream_t get_deep_copy_stream() { + static cudaStream_t s = 0; + if( s == 0) { + cudaStreamCreate ( &s ); + } + return s; + } +} + +DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); } + +DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); } + +DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); } + +DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); } + +DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); } + +DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n ) +{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); } + +void DeepCopyAsyncCuda( void * dst , const void * src , size_t n) { + cudaStream_t s = get_deep_copy_stream(); + CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , s ) ); + cudaStreamSynchronize(s); +} + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + + +namespace Kokkos { + +void CudaSpace::access_error() +{ + const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" ); + Kokkos::Impl::throw_runtime_exception( msg ); +} + +void CudaSpace::access_error( const void * const ) +{ + const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" ); + Kokkos::Impl::throw_runtime_exception( msg ); +} + +/*--------------------------------------------------------------------------*/ + +bool CudaUVMSpace::available() +{ +#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__) + enum { UVM_available = true }; +#else + enum { UVM_available = false }; +#endif + return UVM_available; +} + +/*--------------------------------------------------------------------------*/ + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +CudaSpace::CudaSpace() + : m_device( Kokkos::Cuda().cuda_device() ) +{ +} + +CudaUVMSpace::CudaUVMSpace() + : m_device( Kokkos::Cuda().cuda_device() ) +{ +} + +CudaHostPinnedSpace::CudaHostPinnedSpace() +{ +} + +void * CudaSpace::allocate( const size_t arg_alloc_size ) const +{ + void * ptr = NULL; + + CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) ); + + return ptr ; +} + +void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const +{ + void * ptr = NULL; + + CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); + + return ptr ; +} + +void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const +{ + void * ptr = NULL; + + CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) ); + + return ptr ; +} + +void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const +{ + try { + CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); + } catch(...) {} +} + +void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const +{ + try { + CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); + } catch(...) {} +} + +void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const +{ + try { + CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) ); + } catch(...) {} +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ; + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ; + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ; + +::cudaTextureObject_t +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +attach_texture_object( const unsigned sizeof_alias + , void * const alloc_ptr + , size_t const alloc_size ) +{ + enum { TEXTURE_BOUND_1D = 1u << 27 }; + + if ( ( alloc_ptr == 0 ) || ( sizeof_alias * TEXTURE_BOUND_1D <= alloc_size ) ) { + std::ostringstream msg ; + msg << "Kokkos::CudaSpace ERROR: Cannot attach texture object to" + << " alloc_ptr(" << alloc_ptr << ")" + << " alloc_size(" << alloc_size << ")" + << " max_size(" << ( sizeof_alias * TEXTURE_BOUND_1D ) << ")" ; + std::cerr << msg.str() << std::endl ; + std::cerr.flush(); + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + ::cudaTextureObject_t tex_obj ; + + struct cudaResourceDesc resDesc ; + struct cudaTextureDesc texDesc ; + + memset( & resDesc , 0 , sizeof(resDesc) ); + memset( & texDesc , 0 , sizeof(texDesc) ); + + resDesc.resType = cudaResourceTypeLinear ; + resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() : + ( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() : + /* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) ); + resDesc.res.linear.sizeInBytes = alloc_size ; + resDesc.res.linear.devPtr = alloc_ptr ; + + CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) ); + + return tex_obj ; +} + +std::string +SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const +{ + SharedAllocationHeader header ; + + Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) ); + + return std::string( header.m_label ); +} + +std::string +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const +{ + return std::string( RecordBase::head()->m_label ); +} + +std::string +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const +{ + return std::string( RecordBase::head()->m_label ); +} + +SharedAllocationRecord< Kokkos::CudaSpace , void > * +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +allocate( const Kokkos::CudaSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ) +{ + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); +} + +SharedAllocationRecord< Kokkos::CudaUVMSpace , void > * +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +allocate( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ) +{ + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); +} + +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > * +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +allocate( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ) +{ + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); +} + +void +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +deallocate( SharedAllocationRecord< void , void > * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +void +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +deallocate( SharedAllocationRecord< void , void > * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +void +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +deallocate( SharedAllocationRecord< void , void > * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +SharedAllocationRecord( const Kokkos::CudaSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_tex_obj( 0 ) + , m_space( arg_space ) +{ + SharedAllocationHeader header ; + + // Fill in the Header information + header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); + + strncpy( header.m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); + + // Copy to device memory + Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) ); +} + +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_tex_obj( 0 ) + , m_space( arg_space ) +{ + // Fill in the Header information, directly accessible via UVM + + RecordBase::m_alloc_ptr->m_record = this ; + + strncpy( RecordBase::m_alloc_ptr->m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); +} + +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_space( arg_space ) +{ + // Fill in the Header information, directly accessible via UVM + + RecordBase::m_alloc_ptr->m_record = this ; + + strncpy( RecordBase::m_alloc_ptr->m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); +} + +//---------------------------------------------------------------------------- + +void * SharedAllocationRecord< Kokkos::CudaSpace , void >:: +allocate_tracked( const Kokkos::CudaSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::CudaSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::CudaSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + Kokkos::Impl::DeepCopy<CudaSpace,CudaSpace>( r_new->data() , r_old->data() + , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +void * SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +allocate_tracked( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + Kokkos::Impl::DeepCopy<CudaUVMSpace,CudaUVMSpace>( r_new->data() , r_old->data() + , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +void * SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + Kokkos::Impl::DeepCopy<CudaHostPinnedSpace,CudaHostPinnedSpace>( r_new->data() , r_old->data() + , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +//---------------------------------------------------------------------------- + +SharedAllocationRecord< Kokkos::CudaSpace , void > * +SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr ) +{ + using Header = SharedAllocationHeader ; + using RecordBase = SharedAllocationRecord< void , void > ; + using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ; + +#if 0 + // Copy the header from the allocation + Header head ; + + Header const * const head_cuda = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ; + + if ( alloc_ptr ) { + Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) ); + } + + RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ; + + if ( ! alloc_ptr || record->m_alloc_ptr != head_cuda ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); + } + +#else + + // Iterate the list to search for the record among all allocations + // requires obtaining the root of the list and then locking the list. + + RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) ); + + if ( record == 0 ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); + } + +#endif + + return record ; +} + +SharedAllocationRecord< Kokkos::CudaUVMSpace , void > * +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr ) +{ + using Header = SharedAllocationHeader ; + using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ; + + Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ; + + if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) ); + } + + return static_cast< RecordCuda * >( h->m_record ); +} + +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > * +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr ) +{ + using Header = SharedAllocationHeader ; + using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ; + + Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ; + + if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) ); + } + + return static_cast< RecordCuda * >( h->m_record ); +} + +// Iterate records to print orphaned memory ... +void +SharedAllocationRecord< Kokkos::CudaSpace , void >:: +print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void > * r = & s_root_record ; + + char buffer[256] ; + + SharedAllocationHeader head ; + + if ( detail ) { + do { + if ( r->m_alloc_ptr ) { + Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); + } + else { + head.m_label[0] = 0 ; + } + + //Formatting dependent on sizeof(uintptr_t) + const char * format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; + } + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "Cuda addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; + } + + snprintf( buffer , 256 + , format_string + , reinterpret_cast<uintptr_t>( r ) + , reinterpret_cast<uintptr_t>( r->m_prev ) + , reinterpret_cast<uintptr_t>( r->m_next ) + , reinterpret_cast<uintptr_t>( r->m_alloc_ptr ) + , r->m_alloc_size + , r->m_count + , reinterpret_cast<uintptr_t>( r->m_dealloc ) + , head.m_label + ); + std::cout << buffer ; + r = r->m_next ; + } while ( r != & s_root_record ); + } + else { + do { + if ( r->m_alloc_ptr ) { + + Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); + + //Formatting dependent on sizeof(uintptr_t) + const char * format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "Cuda [ 0x%.12lx + %ld ] %s\n"; + } + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "Cuda [ 0x%.12llx + %ld ] %s\n"; + } + + snprintf( buffer , 256 + , format_string + , reinterpret_cast< uintptr_t >( r->data() ) + , r->size() + , head.m_label + ); + } + else { + snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" ); + } + std::cout << buffer ; + r = r->m_next ; + } while ( r != & s_root_record ); + } +} + +void +SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: +print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail ); +} + +void +SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: +print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail ); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace { + __global__ void init_lock_array_kernel_atomic() { + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if(i<CUDA_SPACE_ATOMIC_MASK+1) + kokkos_impl_cuda_lock_arrays.atomic[i] = 0; + } + + __global__ void init_lock_array_kernel_scratch_threadid(int N) { + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if(i<N) { + kokkos_impl_cuda_lock_arrays.scratch[i] = 0; + kokkos_impl_cuda_lock_arrays.threadid[i] = 0; + } + } +} + + +namespace Impl { +int* atomic_lock_array_cuda_space_ptr(bool deallocate) { + static int* ptr = NULL; + if(deallocate) { + cudaFree(ptr); + ptr = NULL; + } + + if(ptr==NULL && !deallocate) + cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1)); + return ptr; +} + +int* scratch_lock_array_cuda_space_ptr(bool deallocate) { + static int* ptr = NULL; + if(deallocate) { + cudaFree(ptr); + ptr = NULL; + } + + if(ptr==NULL && !deallocate) + cudaMalloc(&ptr,sizeof(int)*(Cuda::concurrency())); + return ptr; +} + +int* threadid_lock_array_cuda_space_ptr(bool deallocate) { + static int* ptr = NULL; + if(deallocate) { + cudaFree(ptr); + ptr = NULL; + } + + if(ptr==NULL && !deallocate) + cudaMalloc(&ptr,sizeof(int)*(Cuda::concurrency())); + return ptr; +} + +void init_lock_arrays_cuda_space() { + static int is_initialized = 0; + if(! is_initialized) { + Kokkos::Impl::CudaLockArraysStruct locks; + locks.atomic = atomic_lock_array_cuda_space_ptr(false); + locks.scratch = scratch_lock_array_cuda_space_ptr(false); + locks.threadid = threadid_lock_array_cuda_space_ptr(false); + cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); + init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>(); + init_lock_array_kernel_scratch_threadid<<<(Kokkos::Cuda::concurrency()+255)/256,256>>>(Kokkos::Cuda::concurrency()); + } +} + +void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) { + static void* ptr = NULL; + static size_t current_size = 0; + if(current_size == 0) { + current_size = bytes; + ptr = Kokkos::kokkos_malloc<Kokkos::CudaSpace>("CudaSpace::ScratchMemory",current_size); + } + if(bytes > current_size) { + current_size = bytes; + ptr = Kokkos::kokkos_realloc<Kokkos::CudaSpace>(ptr,current_size); + } + if((bytes < current_size) && (force_shrink)) { + current_size = bytes; + Kokkos::kokkos_free<Kokkos::CudaSpace>(ptr); + ptr = Kokkos::kokkos_malloc<Kokkos::CudaSpace>("CudaSpace::ScratchMemory",current_size); + } + return ptr; +} + +} +} +#endif // KOKKOS_HAVE_CUDA + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..10999ee57bfd39e81e28b64a5f5a0df5ee877c42 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp @@ -0,0 +1,182 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP +#define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <impl/Kokkos_Traits.hpp> + +namespace Kokkos { +namespace Impl { + +template< class DestructFunctor > +SharedAllocationRecord * +shared_allocation_record( Kokkos::CudaSpace const & arg_space + , void * const arg_alloc_ptr + , DestructFunctor const & arg_destruct ) +{ + SharedAllocationRecord * const record = SharedAllocationRecord::get_record( arg_alloc_ptr ); + + // assert: record != 0 + + // assert: sizeof(DestructFunctor) <= record->m_destruct_size + + // assert: record->m_destruct_function == 0 + + DestructFunctor * const functor = + reinterpret_cast< DestructFunctor * >( + reinterpret_cast< uintptr_t >( record ) + sizeof(SharedAllocationRecord) ); + + new( functor ) DestructFunctor( arg_destruct ); + + record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ; + + return record ; +} + + +/// class CudaUnmanagedAllocator +/// does nothing when deallocate(ptr,size) is called +struct CudaUnmanagedAllocator +{ + static const char * name() + { + return "Cuda Unmanaged Allocator"; + } + + static void deallocate(void * /*ptr*/, size_t /*size*/) {} + + static bool support_texture_binding() { return true; } +}; + +/// class CudaUnmanagedAllocator +/// does nothing when deallocate(ptr,size) is called +struct CudaUnmanagedUVMAllocator +{ + static const char * name() + { + return "Cuda Unmanaged UVM Allocator"; + } + + static void deallocate(void * /*ptr*/, size_t /*size*/) {} + + static bool support_texture_binding() { return true; } +}; + +/// class CudaUnmanagedHostAllocator +/// does nothing when deallocate(ptr,size) is called +class CudaUnmanagedHostAllocator +{ +public: + static const char * name() + { + return "Cuda Unmanaged Host Allocator"; + } + // Unmanaged deallocate does nothing + static void deallocate(void * /*ptr*/, size_t /*size*/) {} +}; + +/// class CudaMallocAllocator +class CudaMallocAllocator +{ +public: + static const char * name() + { + return "Cuda Malloc Allocator"; + } + + static void* allocate(size_t size); + + static void deallocate(void * ptr, size_t); + + static void * reallocate(void * old_ptr, size_t old_size, size_t new_size); + + static bool support_texture_binding() { return true; } +}; + +/// class CudaUVMAllocator +class CudaUVMAllocator +{ +public: + static const char * name() + { + return "Cuda UVM Allocator"; + } + + static void* allocate(size_t size); + + static void deallocate(void * ptr, size_t); + + static void * reallocate(void * old_ptr, size_t old_size, size_t new_size); + + static bool support_texture_binding() { return true; } +}; + +/// class CudaHostAllocator +class CudaHostAllocator +{ +public: + static const char * name() + { + return "Cuda Host Allocator"; + } + + static void* allocate(size_t size); + + static void deallocate(void * ptr, size_t); + + static void * reallocate(void * old_ptr, size_t old_size, size_t new_size); +}; + + +}} // namespace Kokkos::Impl + +#endif //KOKKOS_HAVE_CUDA + +#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a0b29ddc2b270212f9c8b9d18e6ee394b9a61b39 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp @@ -0,0 +1,69 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_ERROR_HPP +#define KOKKOS_CUDA_ERROR_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +namespace Kokkos { namespace Impl { + +void cuda_device_synchronize(); + +void cuda_internal_error_throw( cudaError e , const char * name, const char * file = NULL, const int line = 0 ); + +inline void cuda_internal_safe_call( cudaError e , const char * name, const char * file = NULL, const int line = 0) +{ + if ( cudaSuccess != e ) { cuda_internal_error_throw( e , name, file, line ); } +} + +#define CUDA_SAFE_CALL( call ) \ + Kokkos::Impl::cuda_internal_safe_call( call , #call, __FILE__, __LINE__ ) + +}} // namespace Kokkos::Impl + +#endif //KOKKOS_HAVE_CUDA +#endif //KOKKOS_CUDA_ERROR_HPP diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2d8d07d0772f2dd2d27a73a4b804f3000953c824 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -0,0 +1,778 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/*--------------------------------------------------------------------------*/ +/* Kokkos interfaces */ + +#include <Kokkos_Core.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <Cuda/Kokkos_Cuda_Error.hpp> +#include <Cuda/Kokkos_Cuda_Internal.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + +/*--------------------------------------------------------------------------*/ +/* Standard 'C' libraries */ +#include <stdlib.h> + +/* Standard 'C++' libraries */ +#include <vector> +#include <iostream> +#include <sstream> +#include <string> + +#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE + +__device__ __constant__ +unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; + +__device__ __constant__ +Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ; + +#endif + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +namespace { + +__global__ +void query_cuda_kernel_arch( int * d_arch ) +{ +#if defined( __CUDA_ARCH__ ) + *d_arch = __CUDA_ARCH__ ; +#else + *d_arch = 0 ; +#endif +} + +/** Query what compute capability is actually launched to the device: */ +int cuda_kernel_arch() +{ + int * d_arch = 0 ; + cudaMalloc( (void **) & d_arch , sizeof(int) ); + query_cuda_kernel_arch<<<1,1>>>( d_arch ); + int arch = 0 ; + cudaMemcpy( & arch , d_arch , sizeof(int) , cudaMemcpyDefault ); + cudaFree( d_arch ); + return arch ; +} + +bool cuda_launch_blocking() +{ + const char * env = getenv("CUDA_LAUNCH_BLOCKING"); + + if (env == 0) return false; + + return atoi(env); +} + +} + +void cuda_device_synchronize() +{ +// static const bool launch_blocking = cuda_launch_blocking(); + +// if (!launch_blocking) { + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); +// } +} + +void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line ) +{ + std::ostringstream out ; + out << name << " error( " << cudaGetErrorName(e) << "): " << cudaGetErrorString(e); + if (file) { + out << " " << file << ":" << line; + } + throw_runtime_exception( out.str() ); +} + +//---------------------------------------------------------------------------- +// Some significant cuda device properties: +// +// cudaDeviceProp::name : Text label for device +// cudaDeviceProp::major : Device major number +// cudaDeviceProp::minor : Device minor number +// cudaDeviceProp::warpSize : number of threads per warp +// cudaDeviceProp::multiProcessorCount : number of multiprocessors +// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block +// cudaDeviceProp::totalConstMem : capacity of constant memory +// cudaDeviceProp::totalGlobalMem : capacity of global memory +// cudaDeviceProp::maxGridSize[3] : maximum grid size + +// +// Section 4.4.2.4 of the CUDA Toolkit Reference Manual +// +// struct cudaDeviceProp { +// char name[256]; +// size_t totalGlobalMem; +// size_t sharedMemPerBlock; +// int regsPerBlock; +// int warpSize; +// size_t memPitch; +// int maxThreadsPerBlock; +// int maxThreadsDim[3]; +// int maxGridSize[3]; +// size_t totalConstMem; +// int major; +// int minor; +// int clockRate; +// size_t textureAlignment; +// int deviceOverlap; +// int multiProcessorCount; +// int kernelExecTimeoutEnabled; +// int integrated; +// int canMapHostMemory; +// int computeMode; +// int concurrentKernels; +// int ECCEnabled; +// int pciBusID; +// int pciDeviceID; +// int tccDriver; +// int asyncEngineCount; +// int unifiedAddressing; +// int memoryClockRate; +// int memoryBusWidth; +// int l2CacheSize; +// int maxThreadsPerMultiProcessor; +// }; + + +namespace { + + + +class CudaInternalDevices { +public: + enum { MAXIMUM_DEVICE_COUNT = 64 }; + struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ; + int m_cudaDevCount ; + + CudaInternalDevices(); + + static const CudaInternalDevices & singleton(); +}; + +CudaInternalDevices::CudaInternalDevices() +{ + // See 'cudaSetDeviceFlags' for host-device thread interaction + // Section 4.4.2.6 of the CUDA Toolkit Reference Manual + + CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) ); + + if(m_cudaDevCount > MAXIMUM_DEVICE_COUNT) { + Kokkos::abort("Sorry, you have more GPUs per node than we thought anybody would ever have. Please report this to github.com/kokkos/kokkos."); + } + for ( int i = 0 ; i < m_cudaDevCount ; ++i ) { + CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) ); + } +} + +const CudaInternalDevices & CudaInternalDevices::singleton() +{ + static CudaInternalDevices self ; return self ; +} + +} + +//---------------------------------------------------------------------------- + +class CudaInternal { +private: + + CudaInternal( const CudaInternal & ); + CudaInternal & operator = ( const CudaInternal & ); + + +public: + + typedef Cuda::size_type size_type ; + + int m_cudaDev ; + int m_cudaArch ; + unsigned m_multiProcCount ; + unsigned m_maxWarpCount ; + unsigned m_maxBlock ; + unsigned m_maxSharedWords ; + size_type m_scratchSpaceCount ; + size_type m_scratchFlagsCount ; + size_type m_scratchUnifiedCount ; + size_type m_scratchUnifiedSupported ; + size_type m_streamCount ; + size_type * m_scratchSpace ; + size_type * m_scratchFlags ; + size_type * m_scratchUnified ; + cudaStream_t * m_stream ; + + static int was_initialized; + static int was_finalized; + + static CudaInternal & singleton(); + + int verify_is_initialized( const char * const label ) const ; + + int is_initialized() const + { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } + + void initialize( int cuda_device_id , int stream_count ); + void finalize(); + + void print_configuration( std::ostream & ) const ; + + ~CudaInternal(); + + CudaInternal() + : m_cudaDev( -1 ) + , m_cudaArch( -1 ) + , m_multiProcCount( 0 ) + , m_maxWarpCount( 0 ) + , m_maxBlock( 0 ) + , m_maxSharedWords( 0 ) + , m_scratchSpaceCount( 0 ) + , m_scratchFlagsCount( 0 ) + , m_scratchUnifiedCount( 0 ) + , m_scratchUnifiedSupported( 0 ) + , m_streamCount( 0 ) + , m_scratchSpace( 0 ) + , m_scratchFlags( 0 ) + , m_scratchUnified( 0 ) + , m_stream( 0 ) + {} + + size_type * scratch_space( const size_type size ); + size_type * scratch_flags( const size_type size ); + size_type * scratch_unified( const size_type size ); +}; + +int CudaInternal::was_initialized = 0; +int CudaInternal::was_finalized = 0; +//---------------------------------------------------------------------------- + + +void CudaInternal::print_configuration( std::ostream & s ) const +{ + const CudaInternalDevices & dev_info = CudaInternalDevices::singleton(); + +#if defined( KOKKOS_HAVE_CUDA ) + s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ; +#endif +#if defined( CUDA_VERSION ) + s << "macro CUDA_VERSION = " << CUDA_VERSION + << " = version " << CUDA_VERSION / 1000 + << "." << ( CUDA_VERSION % 1000 ) / 10 + << std::endl ; +#endif + + for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) { + s << "Kokkos::Cuda[ " << i << " ] " + << dev_info.m_cudaProp[i].name + << " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor + << ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem) + << ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock); + if ( m_cudaDev == i ) s << " : Selected" ; + s << std::endl ; + } +} + +//---------------------------------------------------------------------------- + +CudaInternal::~CudaInternal() +{ + if ( m_stream || + m_scratchSpace || + m_scratchFlags || + m_scratchUnified ) { + std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()" + << std::endl ; + std::cerr.flush(); + } + + m_cudaDev = -1 ; + m_cudaArch = -1 ; + m_multiProcCount = 0 ; + m_maxWarpCount = 0 ; + m_maxBlock = 0 ; + m_maxSharedWords = 0 ; + m_scratchSpaceCount = 0 ; + m_scratchFlagsCount = 0 ; + m_scratchUnifiedCount = 0 ; + m_scratchUnifiedSupported = 0 ; + m_streamCount = 0 ; + m_scratchSpace = 0 ; + m_scratchFlags = 0 ; + m_scratchUnified = 0 ; + m_stream = 0 ; +} + +int CudaInternal::verify_is_initialized( const char * const label ) const +{ + if ( m_cudaDev < 0 ) { + std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" << std::endl ; + } + return 0 <= m_cudaDev ; +} + +CudaInternal & CudaInternal::singleton() +{ + static CudaInternal self ; + return self ; +} + +void CudaInternal::initialize( int cuda_device_id , int stream_count ) +{ + if ( was_finalized ) Kokkos::abort("Calling Cuda::initialize after Cuda::finalize is illegal\n"); + was_initialized = 1; + if ( is_initialized() ) return; + + enum { WordSize = sizeof(size_type) }; + + if ( ! HostSpace::execution_space::is_initialized() ) { + const std::string msg("Cuda::initialize ERROR : HostSpace::execution_space is not initialized"); + throw_runtime_exception( msg ); + } + + const CudaInternalDevices & dev_info = CudaInternalDevices::singleton(); + + const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ; + + const bool ok_id = 0 <= cuda_device_id && + cuda_device_id < dev_info.m_cudaDevCount ; + + // Need device capability 2.0 or better + + const bool ok_dev = ok_id && + ( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major && + 0 <= dev_info.m_cudaProp[ cuda_device_id ].minor ); + + if ( ok_init && ok_dev ) { + + const struct cudaDeviceProp & cudaProp = + dev_info.m_cudaProp[ cuda_device_id ]; + + m_cudaDev = cuda_device_id ; + + CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) ); + CUDA_SAFE_CALL( cudaDeviceReset() ); + Kokkos::Impl::cuda_device_synchronize(); + + // Query what compute capability architecture a kernel executes: + m_cudaArch = cuda_kernel_arch(); + + if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) { + std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability " + << ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 ) + << " on device with compute capability " + << cudaProp.major << "." << cudaProp.minor + << " , this will likely reduce potential performance." + << std::endl ; + } + + // number of multiprocessors + + m_multiProcCount = cudaProp.multiProcessorCount ; + + //---------------------------------- + // Maximum number of warps, + // at most one warp per thread in a warp for reduction. + + // HCE 2012-February : + // Found bug in CUDA 4.1 that sometimes a kernel launch would fail + // if the thread count == 1024 and a functor is passed to the kernel. + // Copying the kernel to constant memory and then launching with + // thread count == 1024 would work fine. + // + // HCE 2012-October : + // All compute capabilities support at least 16 warps (512 threads). + // However, we have found that 8 warps typically gives better performance. + + m_maxWarpCount = 8 ; + + // m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ; + + if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) { + m_maxWarpCount = Impl::CudaTraits::WarpSize ; + } + + m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ; + + //---------------------------------- + // Maximum number of blocks: + + m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ; + + //---------------------------------- + + m_scratchUnifiedSupported = cudaProp.unifiedAddressing ; + + if ( ! m_scratchUnifiedSupported ) { + std::cout << "Kokkos::Cuda device " + << cudaProp.name << " capability " + << cudaProp.major << "." << cudaProp.minor + << " does not support unified virtual address space" + << std::endl ; + } + + //---------------------------------- + // Multiblock reduction uses scratch flags for counters + // and scratch space for partial reduction values. + // Allocate some initial space. This will grow as needed. + + { + const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ; + + (void) scratch_unified( 16 * sizeof(size_type) ); + (void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) ); + (void) scratch_space( reduce_block_count * 16 * sizeof(size_type) ); + } + //---------------------------------- + + if ( stream_count ) { + m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) ); + m_streamCount = stream_count ; + for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ; + } + } + else { + + std::ostringstream msg ; + msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ; + + if ( ! ok_init ) { + msg << " : Already initialized" ; + } + if ( ! ok_id ) { + msg << " : Device identifier out of range " + << "[0.." << dev_info.m_cudaDevCount << "]" ; + } + else if ( ! ok_dev ) { + msg << " : Device " ; + msg << dev_info.m_cudaProp[ cuda_device_id ].major ; + msg << "." ; + msg << dev_info.m_cudaProp[ cuda_device_id ].minor ; + msg << " has insufficient capability, required 2.0 or better" ; + } + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + #ifdef KOKKOS_CUDA_USE_UVM + if(!cuda_launch_blocking()) { + std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; + std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl; + std::cout << " The code must call Cuda::fence() after each kernel" << std::endl; + std::cout << " or will likely crash when accessing data on the host." << std::endl; + } + + const char * env_force_device_alloc = getenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC"); + bool force_device_alloc; + if (env_force_device_alloc == 0) force_device_alloc=false; + else force_device_alloc=atoi(env_force_device_alloc)!=0; + + const char * env_visible_devices = getenv("CUDA_VISIBLE_DEVICES"); + bool visible_devices_one=true; + if (env_visible_devices == 0) visible_devices_one=false; + + if(!visible_devices_one && !force_device_alloc) { + std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; + std::cout << " without setting CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or " << std::endl; + std::cout << " setting CUDA_VISIBLE_DEVICES." << std::endl; + std::cout << " This could on multi GPU systems lead to severe performance" << std::endl; + std::cout << " penalties." << std::endl; + } + #endif + + cudaThreadSetCacheConfig(cudaFuncCachePreferShared); + + // Init the array for used for arbitrarily sized atomics + Impl::init_lock_arrays_cuda_space(); + + #ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE + Kokkos::Impl::CudaLockArraysStruct locks; + locks.atomic = atomic_lock_array_cuda_space_ptr(false); + locks.scratch = scratch_lock_array_cuda_space_ptr(false); + locks.threadid = threadid_lock_array_cuda_space_ptr(false); + cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); + #endif +} + +//---------------------------------------------------------------------------- + +typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ; +enum { sizeScratchGrain = sizeof(ScratchGrain) }; + + +Cuda::size_type * +CudaInternal::scratch_flags( const Cuda::size_type size ) +{ + if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) { + + + m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + + Record * const r = Record::allocate( Kokkos::CudaSpace() + , "InternalScratchFlags" + , ( sizeof( ScratchGrain ) * m_scratchFlagsCount ) ); + + Record::increment( r ); + + m_scratchFlags = reinterpret_cast<size_type *>( r->data() ); + + CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) ); + } + + return m_scratchFlags ; +} + +Cuda::size_type * +CudaInternal::scratch_space( const Cuda::size_type size ) +{ + if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) { + + m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + + Record * const r = Record::allocate( Kokkos::CudaSpace() + , "InternalScratchSpace" + , ( sizeof( ScratchGrain ) * m_scratchSpaceCount ) ); + + Record::increment( r ); + + m_scratchSpace = reinterpret_cast<size_type *>( r->data() ); + } + + return m_scratchSpace ; +} + +Cuda::size_type * +CudaInternal::scratch_unified( const Cuda::size_type size ) +{ + if ( verify_is_initialized("scratch_unified") && + m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) { + + m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > Record ; + + Record * const r = Record::allocate( Kokkos::CudaHostPinnedSpace() + , "InternalScratchUnified" + , ( sizeof( ScratchGrain ) * m_scratchUnifiedCount ) ); + + Record::increment( r ); + + m_scratchUnified = reinterpret_cast<size_type *>( r->data() ); + } + + return m_scratchUnified ; +} + +//---------------------------------------------------------------------------- + +void CudaInternal::finalize() +{ + was_finalized = 1; + if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) { + + atomic_lock_array_cuda_space_ptr(false); + scratch_lock_array_cuda_space_ptr(false); + threadid_lock_array_cuda_space_ptr(false); + + if ( m_stream ) { + for ( size_type i = 1 ; i < m_streamCount ; ++i ) { + cudaStreamDestroy( m_stream[i] ); + m_stream[i] = 0 ; + } + ::free( m_stream ); + } + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< CudaSpace > RecordCuda ; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< CudaHostPinnedSpace > RecordHost ; + + RecordCuda::decrement( RecordCuda::get_record( m_scratchFlags ) ); + RecordCuda::decrement( RecordCuda::get_record( m_scratchSpace ) ); + RecordHost::decrement( RecordHost::get_record( m_scratchUnified ) ); + + m_cudaDev = -1 ; + m_multiProcCount = 0 ; + m_maxWarpCount = 0 ; + m_maxBlock = 0 ; + m_maxSharedWords = 0 ; + m_scratchSpaceCount = 0 ; + m_scratchFlagsCount = 0 ; + m_scratchUnifiedCount = 0 ; + m_streamCount = 0 ; + m_scratchSpace = 0 ; + m_scratchFlags = 0 ; + m_scratchUnified = 0 ; + m_stream = 0 ; + } +} + +//---------------------------------------------------------------------------- + +Cuda::size_type cuda_internal_multiprocessor_count() +{ return CudaInternal::singleton().m_multiProcCount ; } + +Cuda::size_type cuda_internal_maximum_warp_count() +{ return CudaInternal::singleton().m_maxWarpCount ; } + +Cuda::size_type cuda_internal_maximum_grid_count() +{ return CudaInternal::singleton().m_maxBlock ; } + +Cuda::size_type cuda_internal_maximum_shared_words() +{ return CudaInternal::singleton().m_maxSharedWords ; } + +Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size ) +{ return CudaInternal::singleton().scratch_space( size ); } + +Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size ) +{ return CudaInternal::singleton().scratch_flags( size ); } + +Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size ) +{ return CudaInternal::singleton().scratch_unified( size ); } + + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +Cuda::size_type Cuda::detect_device_count() +{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; } + +int Cuda::concurrency() { + return 131072; +} + +int Cuda::is_initialized() +{ return Impl::CudaInternal::singleton().is_initialized(); } + +void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances ) +{ + Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif +} + +std::vector<unsigned> +Cuda::detect_device_arch() +{ + const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton(); + + std::vector<unsigned> output( s.m_cudaDevCount ); + + for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) { + output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ; + } + + return output ; +} + +Cuda::size_type Cuda::device_arch() +{ + const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ; + + int dev_arch = 0 ; + + if ( 0 <= dev_id ) { + const struct cudaDeviceProp & cudaProp = + Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ; + + dev_arch = cudaProp.major * 100 + cudaProp.minor ; + } + + return dev_arch ; +} + +void Cuda::finalize() +{ + Impl::CudaInternal::singleton().finalize(); + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif +} + +Cuda::Cuda() + : m_device( Impl::CudaInternal::singleton().m_cudaDev ) + , m_stream( 0 ) +{ + Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ); +} + +Cuda::Cuda( const int instance_id ) + : m_device( Impl::CudaInternal::singleton().m_cudaDev ) + , m_stream( + Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ) + ? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ] + : 0 ) +{} + +void Cuda::print_configuration( std::ostream & s , const bool ) +{ Impl::CudaInternal::singleton().print_configuration( s ); } + +bool Cuda::sleep() { return false ; } + +bool Cuda::wake() { return true ; } + +void Cuda::fence() +{ + Kokkos::Impl::cuda_device_synchronize(); +} + +} // namespace Kokkos + +#endif // KOKKOS_HAVE_CUDA +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b10d47f8857e9ca19b4ae962659f7e9137d78a1 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp @@ -0,0 +1,202 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_INTERNAL_HPP +#define KOKKOS_CUDA_INTERNAL_HPP +#include<iostream> +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <Cuda/Kokkos_Cuda_Error.hpp> + +namespace Kokkos { namespace Impl { + +template<class DriverType, bool Large> +struct CudaGetMaxBlockSize; + +template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))> +int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + return CudaGetMaxBlockSize<DriverType,Large>::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); +} + + +template<class DriverType> +struct CudaGetMaxBlockSize<DriverType,true> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int numBlocks; + int blockSize=32; + int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + + while (blockSize<1024 && numBlocks>0) { + blockSize*=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + } + if(numBlocks>0) return blockSize; + else return blockSize/2; + } +}; + +template<class DriverType> +struct CudaGetMaxBlockSize<DriverType,false> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int numBlocks; + + int blockSize=32; + int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + + while (blockSize<1024 && numBlocks>0) { + blockSize*=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + } + if(numBlocks>0) return blockSize; + else return blockSize/2; + } +}; + + + +template<class DriverType, bool Large> +struct CudaGetOptBlockSize; + +template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))> +int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + return CudaGetOptBlockSize<DriverType,Large>::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); +} + +template<class DriverType> +struct CudaGetOptBlockSize<DriverType,true> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int blockSize=16; + int numBlocks; + int sharedmem; + int maxOccupancy=0; + int bestBlockSize=0; + + while(blockSize<1024) { + blockSize*=2; + + //calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + if(maxOccupancy < numBlocks*blockSize) { + maxOccupancy = numBlocks*blockSize; + bestBlockSize = blockSize; + } + } + return bestBlockSize; + } +}; + +template<class DriverType> +struct CudaGetOptBlockSize<DriverType,false> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int blockSize=16; + int numBlocks; + int sharedmem; + int maxOccupancy=0; + int bestBlockSize=0; + + while(blockSize<1024) { + blockSize*=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + + if(maxOccupancy < numBlocks*blockSize) { + maxOccupancy = numBlocks*blockSize; + bestBlockSize = blockSize; + } + } + return bestBlockSize; + } +}; + +}} // namespace Kokkos::Impl + +#endif // KOKKOS_HAVE_CUDA +#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7afa06fdf5582cd3543294b4156ac90a906a6ce7 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -0,0 +1,1926 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_PARALLEL_HPP +#define KOKKOS_CUDA_PARALLEL_HPP + +#include <iostream> +#include <algorithm> +#include <stdio.h> + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) + +#include <utility> +#include <Kokkos_Parallel.hpp> + +#include <Cuda/Kokkos_CudaExec.hpp> +#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> +#include <Cuda/Kokkos_Cuda_Internal.hpp> +#include <Kokkos_Vectorization.hpp> + +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <typeinfo> +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename Type > +struct CudaJoinFunctor { + typedef Type value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + volatile const value_type & input ) + { update += input ; } +}; + +class CudaTeamMember { +private: + + typedef Kokkos::Cuda execution_space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; + + void * m_team_reduce ; + scratch_memory_space m_team_shared ; + int m_league_rank ; + int m_league_size ; + +public: + +#if defined( __CUDA_ARCH__ ) + + __device__ inline + const execution_space::scratch_memory_space & team_shmem() const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + __device__ inline + const execution_space::scratch_memory_space & team_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,1,0) ; } + __device__ inline + const execution_space::scratch_memory_space & thread_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } + + __device__ inline int league_rank() const { return m_league_rank ; } + __device__ inline int league_size() const { return m_league_size ; } + __device__ inline int team_rank() const { return threadIdx.y ; } + __device__ inline int team_size() const { return blockDim.y ; } + + __device__ inline void team_barrier() const { __syncthreads(); } + + template<class ValueType> + __device__ inline void team_broadcast(ValueType& value, const int& thread_id) const { + __shared__ ValueType sh_val; + if(threadIdx.x == 0 && threadIdx.y == thread_id) { + sh_val = value; + } + team_barrier(); + value = sh_val; + team_barrier(); + } + +#ifdef KOKKOS_HAVE_CXX11 + template< class ValueType, class JoinOp > + __device__ inline + typename JoinOp::value_type team_reduce( const ValueType & value + , const JoinOp & op_in ) const + { + typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ; + const JoinOpFunctor op(op_in); + ValueType * const base_data = (ValueType *) m_team_reduce ; +#else + template< class JoinOp > + __device__ inline + typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const + { + typedef JoinOp JoinOpFunctor ; + typename JoinOp::value_type * const base_data = (typename JoinOp::value_type *) m_team_reduce ; +#endif + + __syncthreads(); // Don't write in to shared data until all threads have entered this function + + if ( 0 == threadIdx.y ) { base_data[0] = 0 ; } + + base_data[ threadIdx.y ] = value ; + + Impl::cuda_intra_block_reduce_scan<false,JoinOpFunctor,void>( op , base_data ); + + return base_data[ blockDim.y - 1 ]; + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename Type > + __device__ inline Type team_scan( const Type & value , Type * const global_accum ) const + { + Type * const base_data = (Type *) m_team_reduce ; + + __syncthreads(); // Don't write in to shared data until all threads have entered this function + + if ( 0 == threadIdx.y ) { base_data[0] = 0 ; } + + base_data[ threadIdx.y + 1 ] = value ; + + Impl::cuda_intra_block_reduce_scan<true,Impl::CudaJoinFunctor<Type>,void>( Impl::CudaJoinFunctor<Type>() , base_data + 1 ); + + if ( global_accum ) { + if ( blockDim.y == threadIdx.y + 1 ) { + base_data[ blockDim.y ] = atomic_fetch_add( global_accum , base_data[ blockDim.y ] ); + } + __syncthreads(); // Wait for atomic + base_data[ threadIdx.y ] += base_data[ blockDim.y ] ; + } + + return base_data[ threadIdx.y ]; + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + __device__ inline Type team_scan( const Type & value ) const + { return this->template team_scan<Type>( value , 0 ); } + + //---------------------------------------- + // Private for the driver + + __device__ inline + CudaTeamMember( void * shared + , const int shared_begin + , const int shared_size + , void* scratch_level_1_ptr + , const int scratch_level_1_size + , const int arg_league_rank + , const int arg_league_size ) + : m_team_reduce( shared ) + , m_team_shared( ((char *)shared) + shared_begin , shared_size, scratch_level_1_ptr, scratch_level_1_size) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) + {} + +#else + + const execution_space::scratch_memory_space & team_shmem() const + { return m_team_shared.set_team_thread_mode(0, 1,0) ; } + const execution_space::scratch_memory_space & team_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,1,0) ; } + const execution_space::scratch_memory_space & thread_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } + + int league_rank() const {return 0;} + int league_size() const {return 1;} + int team_rank() const {return 0;} + int team_size() const {return 1;} + + void team_barrier() const {} + template<class ValueType> + void team_broadcast(ValueType& value, const int& thread_id) const {} + + template< class JoinOp > + typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const {return typename JoinOp::value_type();} + + template< typename Type > + Type team_scan( const Type & value , Type * const global_accum ) const {return Type();} + + template< typename Type > + Type team_scan( const Type & value ) const {return Type();} + + //---------------------------------------- + // Private for the driver + + CudaTeamMember( void * shared + , const int shared_begin + , const int shared_end + , void* scratch_level_1_ptr + , const int scratch_level_1_size + , const int arg_league_rank + , const int arg_league_size ); + +#endif /* #if ! defined( __CUDA_ARCH__ ) */ + +}; + +} // namespace Impl + +namespace Impl { +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Cuda , Properties ... >: public PolicyTraits<Properties ... > +{ +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits<Properties ... > traits; + +private: + + enum { MAX_WARP = 8 }; + + int m_league_size ; + int m_team_size ; + int m_vector_length ; + int m_team_scratch_size[2] ; + int m_thread_scratch_size[2] ; + int m_chunk_size; + +public: + + //! Execution space of this execution policy + typedef Kokkos::Cuda execution_space ; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & functor ) + { + int n = MAX_WARP * Impl::CudaTraits::WarpSize ; + + for ( ; n ; n >>= 1 ) { + const int shmem_size = + /* for global reduce */ Impl::cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,typename traits::work_tag>( functor , n ) + /* for team reduce */ + ( n + 2 ) * sizeof(double) + /* for team shared */ + Impl::FunctorTeamShmemSize< FunctorType >::value( functor , n ); + + if ( shmem_size < Impl::CudaTraits::SharedMemoryCapacity ) break ; + } + + return n ; + } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & functor ) + { return team_size_max( functor ); } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & functor , const int vector_length) + { + int max = team_size_max( functor )/vector_length; + if(max<1) max = 1; + return max; + } + + inline static + int vector_length_max() + { return Impl::CudaTraits::WarpSize; } + + //---------------------------------------- + + inline int vector_length() const { return m_vector_length ; } + inline int team_size() const { return m_team_size ; } + inline int league_size() const { return m_league_size ; } + inline int scratch_size(int level, int team_size_ = -1) const { + if(team_size_<0) team_size_ = m_team_size; + return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level]; + } + inline size_t team_scratch_size(int level) const { + return m_team_scratch_size[level]; + } + inline size_t thread_scratch_size(int level) const { + return m_thread_scratch_size[level]; + } + + TeamPolicyInternal() + : m_league_size( 0 ) + , m_team_size( 0 ) + , m_vector_length( 0 ) + , m_team_scratch_size {0,0} + , m_thread_scratch_size {0,0} + , m_chunk_size ( 32 ) + {} + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( execution_space & + , int league_size_ + , int team_size_request + , int vector_length_request = 1 ) + : m_league_size( league_size_ ) + , m_team_size( team_size_request ) + , m_vector_length( vector_length_request ) + , m_team_scratch_size {0,0} + , m_thread_scratch_size {0,0} + , m_chunk_size ( 32 ) + { + // Allow only power-of-two vector_length + if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { + Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); + } + + // Make sure league size is permissable + if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) + Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); + + // Make sure total block size is permissable + if ( m_team_size * m_vector_length > 1024 ) { + Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024.")); + } + } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( execution_space & + , int league_size_ + , const Kokkos::AUTO_t & /* team_size_request */ + , int vector_length_request = 1 ) + : m_league_size( league_size_ ) + , m_team_size( -1 ) + , m_vector_length( vector_length_request ) + , m_team_scratch_size {0,0} + , m_thread_scratch_size {0,0} + , m_chunk_size ( 32 ) + { + // Allow only power-of-two vector_length + if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { + Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); + } + + // Make sure league size is permissable + if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) + Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); + } + + TeamPolicyInternal( int league_size_ + , int team_size_request + , int vector_length_request = 1 ) + : m_league_size( league_size_ ) + , m_team_size( team_size_request ) + , m_vector_length ( vector_length_request ) + , m_team_scratch_size {0,0} + , m_thread_scratch_size {0,0} + , m_chunk_size ( 32 ) + { + // Allow only power-of-two vector_length + if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { + Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); + } + + // Make sure league size is permissable + if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) + Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); + + // Make sure total block size is permissable + if ( m_team_size * m_vector_length > 1024 ) { + Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024.")); + } + } + + TeamPolicyInternal( int league_size_ + , const Kokkos::AUTO_t & /* team_size_request */ + , int vector_length_request = 1 ) + : m_league_size( league_size_ ) + , m_team_size( -1 ) + , m_vector_length ( vector_length_request ) + , m_team_scratch_size {0,0} + , m_thread_scratch_size {0,0} + , m_chunk_size ( 32 ) + { + // Allow only power-of-two vector_length + if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { + Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); + } + + // Make sure league size is permissable + if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) + Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); + } + + inline int chunk_size() const { return m_chunk_size ; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; + p.m_chunk_size = chunk_size_; + return p; + } + + /** \brief set per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + return p; + }; + + /** \brief set per thread scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + /** \brief set per thread and per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + typedef Kokkos::Impl::CudaTeamMember member_type ; +}; +} // namspace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Cuda + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + + const FunctorType m_functor ; + const Policy m_policy ; + + ParallelFor() = delete ; + ParallelFor & operator = ( const ParallelFor & ) = delete ; + + template< class TagType > + inline __device__ + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const Member i ) const + { m_functor( i ); } + + template< class TagType > + inline __device__ + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const Member i ) const + { m_functor( TagType() , i ); } + +public: + + typedef FunctorType functor_type ; + + inline + __device__ + void operator()(void) const + { + const Member work_stride = blockDim.y * gridDim.x ; + const Member work_end = m_policy.end(); + + for ( Member + iwork = m_policy.begin() + threadIdx.y + blockDim.y * blockIdx.x ; + iwork < work_end ; + iwork += work_stride ) { + this-> template exec_range< WorkTag >( iwork ); + } + } + + inline + void execute() const + { + const int nwork = m_policy.end() - m_policy.begin(); + const dim3 block( 1 , CudaTraits::WarpSize * cuda_internal_maximum_warp_count(), 1); + const dim3 grid( std::min( ( nwork + block.y - 1 ) / block.y , cuda_internal_maximum_grid_count() ) , 1 , 1); + + CudaParallelLaunch< ParallelFor >( *this , grid , block , 0 ); + } + + ParallelFor( const FunctorType & arg_functor , + const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { } +}; + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , Kokkos::Cuda + > +{ +private: + + typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... > Policy ; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + +public: + + typedef FunctorType functor_type ; + typedef Cuda::size_type size_type ; + +private: + + // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1 + // shared memory utilization: + // + // [ team reduce space ] + // [ team shared space ] + // + + const FunctorType m_functor ; + const size_type m_league_size ; + const size_type m_team_size ; + const size_type m_vector_size ; + const size_type m_shmem_begin ; + const size_type m_shmem_size ; + void* m_scratch_ptr[2] ; + const int m_scratch_size[2] ; + + template< class TagType > + __device__ inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const Member & member ) const + { m_functor( member ); } + + template< class TagType > + __device__ inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const Member & member ) const + { m_functor( TagType() , member ); } + +public: + + __device__ inline + void operator()(void) const + { + // Iterate this block through the league + for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) { + + this-> template exec_team< WorkTag >( + typename Policy::member_type( kokkos_impl_cuda_shared_memory<void>() + , m_shmem_begin + , m_shmem_size + , m_scratch_ptr[1] + , m_scratch_size[1] + , league_rank + , m_league_size ) ); + } + } + + inline + void execute() const + { + const int shmem_size_total = m_shmem_begin + m_shmem_size ; + const dim3 grid( int(m_league_size) , 1 , 1 ); + const dim3 block( int(m_vector_size) , int(m_team_size) , 1 ); + + CudaParallelLaunch< ParallelFor >( *this, grid, block, shmem_size_total ); // copy to device and execute + + } + + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy + ) + : m_functor( arg_functor ) + , m_league_size( arg_policy.league_size() ) + , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : + Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) + , m_vector_size( arg_policy.vector_length() ) + , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) ) + , m_shmem_size( arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ) + , m_scratch_ptr{NULL,NULL} + , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)} + { + // Functor's reduce memory, team scan memory, and team shared memory depend upon team size. + m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + + const int shmem_size_total = m_shmem_begin + m_shmem_size ; + if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); + } + + if ( int(m_team_size) > + int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor > + ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); + } + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ... > + , ReducerType + , Kokkos::Cuda + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + +public: + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::value_type value_type ; + typedef typename ValueTraits::reference_type reference_type ; + typedef FunctorType functor_type ; + typedef Cuda::size_type size_type ; + + // Algorithmic constraints: blockSize is a power of two AND blockDim.y == blockDim.z == 1 + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + size_type * m_scratch_space ; + size_type * m_scratch_flags ; + size_type * m_unified_space ; + + // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit + enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) }; + // Some crutch to do function overloading +private: + typedef double DummyShflReductionType; + typedef int DummySHMEMReductionType; + +public: + template< class TagType > + __device__ inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const Member & i , reference_type update ) const + { m_functor( i , update ); } + + template< class TagType > + __device__ inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const Member & i , reference_type update ) const + { m_functor( TagType() , i , update ); } + + __device__ inline + void operator() () const { + run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); + } + + __device__ inline + void run(const DummySHMEMReductionType& ) const + { + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > + word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); + + { + reference_type value = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , kokkos_impl_cuda_shared_memory<size_type>() + threadIdx.y * word_count.value ); + + // Number of blocks is bounded so that the reduction can be limited to two passes. + // Each thread block is given an approximately equal amount of work to perform. + // Accumulate the values for this block. + // The accumulation ordering does not match the final pass, but is arithmatically equivalent. + + const WorkRange range( m_policy , blockIdx.x , gridDim.x ); + + for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ; + iwork < iwork_end ; iwork += blockDim.y ) { + this-> template exec_range< WorkTag >( iwork , value ); + } + } + + // Reduce with final value at blockDim.y - 1 location. + if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTag>( + ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x , + kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) { + + // This is the final block with the final result at the final threads' location + + size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ; + size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; + + if ( threadIdx.y == 0 ) { + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); + } + + if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } + + for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; } + } + } + + __device__ inline + void run(const DummyShflReductionType&) const + { + + value_type value; + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value); + // Number of blocks is bounded so that the reduction can be limited to two passes. + // Each thread block is given an approximately equal amount of work to perform. + // Accumulate the values for this block. + // The accumulation ordering does not match the final pass, but is arithmatically equivalent. + + const WorkRange range( m_policy , blockIdx.x , gridDim.x ); + + for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ; + iwork < iwork_end ; iwork += blockDim.y ) { + this-> template exec_range< WorkTag >( iwork , value ); + } + + pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ; + + int max_active_thread = range.end()-range.begin() < blockDim.y ? range.end() - range.begin():blockDim.y; + + max_active_thread = (max_active_thread == 0)?blockDim.y:max_active_thread; + + value_type init; + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init); + if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTag> + (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) { + const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; + if(id==0) { + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); + *result = value; + } + } + } + + // Determine block size constrained by shared memory: + static inline + unsigned local_block_size( const FunctorType & f ) + { + unsigned n = CudaTraits::WarpSize * 8 ; + while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; } + return n ; + } + + inline + void execute() + { + const int nwork = m_policy.end() - m_policy.begin(); + if ( nwork ) { + const int block_size = local_block_size( m_functor ); + + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); + m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + + // REQUIRED ( 1 , N , 1 ) + const dim3 block( 1 , block_size , 1 ); + // Required grid.x <= block.y + const dim3 grid( std::min( int(block.y) , int( ( nwork + block.y - 1 ) / block.y ) ) , 1 , 1 ); + + const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y ); + + + CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute + + Cuda::fence(); + + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size ); + } + } + } + else { + if (m_result_ptr) { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); + } + } + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const HostViewType & arg_result + , typename std::enable_if< + Kokkos::is_view< HostViewType >::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + , m_unified_space( 0 ) + { } + + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ReducerType & reducer) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().ptr_on_device() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + , m_unified_space( 0 ) + { } +}; + +//---------------------------------------------------------------------------- + +template< class FunctorType , class ReducerType, class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::Cuda + > +{ +private: + + typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + typedef typename ValueTraits::value_type value_type ; + + +public: + + typedef FunctorType functor_type ; + typedef Cuda::size_type size_type ; + + enum { UseShflReduction = (true && ValueTraits::StaticValueSize) }; + +private: + typedef double DummyShflReductionType; + typedef int DummySHMEMReductionType; + + + // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1 + // shared memory utilization: + // + // [ global reduce space ] + // [ team reduce space ] + // [ team shared space ] + // + + const FunctorType m_functor ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + size_type * m_scratch_space ; + size_type * m_scratch_flags ; + size_type * m_unified_space ; + size_type m_team_begin ; + size_type m_shmem_begin ; + size_type m_shmem_size ; + void* m_scratch_ptr[2] ; + int m_scratch_size[2] ; + const size_type m_league_size ; + const size_type m_team_size ; + const size_type m_vector_size ; + + template< class TagType > + __device__ inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const Member & member , reference_type update ) const + { m_functor( member , update ); } + + template< class TagType > + __device__ inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const Member & member , reference_type update ) const + { m_functor( TagType() , member , update ); } + +public: + + __device__ inline + void operator() () const { + run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); + } + + __device__ inline + void run(const DummySHMEMReductionType&) const + { + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > + word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); + + reference_type value = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , kokkos_impl_cuda_shared_memory<size_type>() + threadIdx.y * word_count.value ); + + // Iterate this block through the league + for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) { + this-> template exec_team< WorkTag > + ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin + , m_shmem_begin + , m_shmem_size + , m_scratch_ptr[1] + , m_scratch_size[1] + , league_rank + , m_league_size ) + , value ); + } + + // Reduce with final value at blockDim.y - 1 location. + if ( cuda_single_inter_block_reduce_scan<false,FunctorType,WorkTag>( + ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x , + kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) { + + // This is the final block with the final result at the final threads' location + + size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ; + size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; + + if ( threadIdx.y == 0 ) { + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); + } + + if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } + + for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; } + } + } + + __device__ inline + void run(const DummyShflReductionType&) const + { + value_type value; + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value); + + // Iterate this block through the league + for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) { + this-> template exec_team< WorkTag > + ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin + , m_shmem_begin + , m_shmem_size + , m_scratch_ptr[1] + , m_scratch_size[1] + , league_rank + , m_league_size ) + , value ); + } + + pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ; + + value_type init; + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init); + if(Impl::cuda_inter_block_reduction<FunctorType,ValueJoin,WorkTag> + (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) { + const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; + if(id==0) { + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); + *result = value; + } + } + } + + inline + void execute() + { + const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) ) + :std::min( m_league_size , m_team_size ); + + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); + m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + + const dim3 block( m_vector_size , m_team_size , 1 ); + const dim3 grid( block_count , 1 , 1 ); + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; + + CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute + + Cuda::fence(); + + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size ); + } + } + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const HostViewType & arg_result + , typename std::enable_if< + Kokkos::is_view< HostViewType >::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + , m_unified_space( 0 ) + , m_team_begin( 0 ) + , m_shmem_begin( 0 ) + , m_shmem_size( 0 ) + , m_scratch_ptr{NULL,NULL} + , m_league_size( arg_policy.league_size() ) + , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / + arg_policy.vector_length() ) + , m_vector_size( arg_policy.vector_length() ) + , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)} + { + // Return Init value if the number of worksets is zero + if( arg_policy.league_size() == 0) { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , arg_result.ptr_on_device() ); + return ; + } + + m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( arg_functor , m_team_size ); + m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); + m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); + m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + + // The global parallel_reduce does not support vector_length other than 1 at the moment + if( (arg_policy.vector_length() > 1) && !UseShflReduction ) + Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a vector length of greater than 1 is not currently supported for CUDA for dynamic sized reduction types."); + + if( (m_team_size < 32) && !UseShflReduction ) + Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller than 32 is not currently supported with CUDA for dynamic sized reduction types."); + + // Functor's reduce memory, team scan memory, and team shared memory depend upon team size. + + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; + + if (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); + } + + if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); + } + + if ( m_team_size > + Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > + ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length()) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); + } + + } + + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ReducerType & reducer) + : m_functor( arg_functor ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().ptr_on_device() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + , m_unified_space( 0 ) + , m_team_begin( 0 ) + , m_shmem_begin( 0 ) + , m_shmem_size( 0 ) + , m_scratch_ptr{NULL,NULL} + , m_league_size( arg_policy.league_size() ) + , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / + arg_policy.vector_length() ) + , m_vector_size( arg_policy.vector_length() ) + { + // Return Init value if the number of worksets is zero + if( arg_policy.league_size() == 0) { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); + return ; + } + + m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( arg_functor , m_team_size ); + m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); + m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); + m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + + // The global parallel_reduce does not support vector_length other than 1 at the moment + if( (arg_policy.vector_length() > 1) && !UseShflReduction ) + Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a vector length of greater than 1 is not currently supported for CUDA for dynamic sized reduction types."); + + if( (m_team_size < 32) && !UseShflReduction ) + Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller than 32 is not currently supported with CUDA for dynamic sized reduction types."); + + // Functor's reduce memory, team scan memory, and team shared memory depend upon team size. + + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; + + if ( (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) || + CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); + } + + if ( int(m_team_size) > + int(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > + ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); + } + + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Cuda + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ; + +public: + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + typedef FunctorType functor_type ; + typedef Cuda::size_type size_type ; + +private: + + // Algorithmic constraints: + // (a) blockDim.y is a power of two + // (b) blockDim.y == blockDim.z == 1 + // (c) gridDim.x <= blockDim.y * blockDim.y + // (d) gridDim.y == gridDim.z == 1 + + const FunctorType m_functor ; + const Policy m_policy ; + size_type * m_scratch_space ; + size_type * m_scratch_flags ; + size_type m_final ; + + template< class TagType > + __device__ inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const Member & i , reference_type update , const bool final_result ) const + { m_functor( i , update , final_result ); } + + template< class TagType > + __device__ inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const Member & i , reference_type update , const bool final_result ) const + { m_functor( TagType() , i , update , final_result ); } + + //---------------------------------------- + + __device__ inline + void initial(void) const + { + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > + word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) ); + + size_type * const shared_value = kokkos_impl_cuda_shared_memory<size_type>() + word_count.value * threadIdx.y ; + + ValueInit::init( m_functor , shared_value ); + + // Number of blocks is bounded so that the reduction can be limited to two passes. + // Each thread block is given an approximately equal amount of work to perform. + // Accumulate the values for this block. + // The accumulation ordering does not match the final pass, but is arithmatically equivalent. + + const WorkRange range( m_policy , blockIdx.x , gridDim.x ); + + for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ; + iwork < iwork_end ; iwork += blockDim.y ) { + this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_value ) , false ); + } + + // Reduce and scan, writing out scan of blocks' totals and block-groups' totals. + // Blocks' scan values are written to 'blockIdx.x' location. + // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < gridDim.x + cuda_single_inter_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , blockIdx.x , gridDim.x , kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ); + } + + //---------------------------------------- + + __device__ inline + void final(void) const + { + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > + word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) ); + + // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , value[2] , ... } + size_type * const shared_data = kokkos_impl_cuda_shared_memory<size_type>(); + size_type * const shared_prefix = shared_data + word_count.value * threadIdx.y ; + size_type * const shared_accum = shared_data + word_count.value * ( blockDim.y + 1 ); + + // Starting value for this thread block is the previous block's total. + if ( blockIdx.x ) { + size_type * const block_total = m_scratch_space + word_count.value * ( blockIdx.x - 1 ); + for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i] ; } + } + else if ( 0 == threadIdx.y ) { + ValueInit::init( m_functor , shared_accum ); + } + + const WorkRange range( m_policy , blockIdx.x , gridDim.x ); + + for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) { + + const typename Policy::member_type iwork = iwork_base + threadIdx.y ; + + __syncthreads(); // Don't overwrite previous iteration values until they are used + + ValueInit::init( m_functor , shared_prefix + word_count.value ); + + // Copy previous block's accumulation total into thread[0] prefix and inclusive scan value of this block + for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { + shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ; + } + + if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values. + + // Call functor to accumulate inclusive scan value for this work item + if ( iwork < range.end() ) { + this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_prefix + word_count.value ) , false ); + } + + // Scan block values into locations shared_data[1..blockDim.y] + cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , ValueTraits::pointer_type(shared_data+word_count.value) ); + + { + size_type * const block_total = shared_data + word_count.value * blockDim.y ; + for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i]; } + } + + // Call functor with exclusive scan value + if ( iwork < range.end() ) { + this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_prefix ) , true ); + } + } + } + +public: + + //---------------------------------------- + + __device__ inline + void operator()(void) const + { + if ( ! m_final ) { + initial(); + } + else { + final(); + } + } + + // Determine block size constrained by shared memory: + static inline + unsigned local_block_size( const FunctorType & f ) + { + // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) + // gridDim.x <= blockDim.y * blockDim.y + // + // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing + + unsigned n = CudaTraits::WarpSize * 4 ; + while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; } + return n ; + } + + inline + void execute() + { + const int nwork = m_policy.end() - m_policy.begin(); + if ( nwork ) { + enum { GridMaxComputeCapability_2x = 0x0ffff }; + + const int block_size = local_block_size( m_functor ); + + const int grid_max = + ( block_size * block_size ) < GridMaxComputeCapability_2x ? + ( block_size * block_size ) : GridMaxComputeCapability_2x ; + + // At most 'max_grid' blocks: + const int max_grid = std::min( int(grid_max) , int(( nwork + block_size - 1 ) / block_size )); + + // How much work per block: + const int work_per_block = ( nwork + max_grid - 1 ) / max_grid ; + + // How many block are really needed for this much work: + const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; + + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); + m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); + + const dim3 grid( grid_x , 1 , 1 ); + const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) + const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); + + m_final = false ; + CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute + + m_final = true ; + CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute + } + } + + ParallelScan( const FunctorType & arg_functor , + const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + , m_final( false ) + { } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + template<typename iType> + struct TeamThreadRangeBoundariesStruct<iType,CudaTeamMember> { + typedef iType index_type; + const iType start; + const iType end; + const iType increment; + const CudaTeamMember& thread; + +#ifdef __CUDA_ARCH__ + __device__ inline + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): + start( threadIdx.y ), + end( count ), + increment( blockDim.y ), + thread(thread_) + {} + __device__ inline + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_): + start( begin_+threadIdx.y ), + end( end_ ), + increment( blockDim.y ), + thread(thread_) + {} +#else + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): + start( 0 ), + end( count ), + increment( 1 ), + thread(thread_) + {} + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_): + start( begin_ ), + end( end_ ), + increment( 1 ), + thread(thread_) + {} +#endif + }; + + template<typename iType> + struct ThreadVectorRangeBoundariesStruct<iType,CudaTeamMember> { + typedef iType index_type; + const iType start; + const iType end; + const iType increment; + +#ifdef __CUDA_ARCH__ + __device__ inline + ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread, const iType& count): + start( threadIdx.x ), + end( count ), + increment( blockDim.x ) + {} +#else + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): + start( 0 ), + end( count ), + increment( 1 ) + {} +#endif + }; + +} // namespace Impl + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> + TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& count) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> + TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& begin, const iType& end) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,begin,end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > + ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::CudaTeamMember> PerTeam(const Impl::CudaTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::CudaTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::CudaTeamMember> PerThread(const Impl::CudaTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::CudaTeamMember>(thread); +} + +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, const Lambda& lambda) { + #ifdef __CUDA_ARCH__ + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); + #endif +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + +#ifdef __CUDA_ARCH__ + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; }); + Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; }); + +#endif +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + +#ifdef __CUDA_ARCH__ + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + Impl::cuda_intra_warp_reduction(result, join ); + Impl::cuda_inter_warp_reduction(result, join ); + + init_result = result; +#endif +} + +} //namespace Kokkos + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& + loop_boundaries, const Lambda& lambda) { +#ifdef __CUDA_ARCH__ + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +#endif +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { +#ifdef __CUDA_ARCH__ + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + if (loop_boundaries.increment > 1) + result += shfl_down(result, 1,loop_boundaries.increment); + if (loop_boundaries.increment > 2) + result += shfl_down(result, 2,loop_boundaries.increment); + if (loop_boundaries.increment > 4) + result += shfl_down(result, 4,loop_boundaries.increment); + if (loop_boundaries.increment > 8) + result += shfl_down(result, 8,loop_boundaries.increment); + if (loop_boundaries.increment > 16) + result += shfl_down(result, 16,loop_boundaries.increment); + + result = shfl(result,0,loop_boundaries.increment); +#endif +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + +#ifdef __CUDA_ARCH__ + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + if (loop_boundaries.increment > 1) + join( result, shfl_down(result, 1,loop_boundaries.increment)); + if (loop_boundaries.increment > 2) + join( result, shfl_down(result, 2,loop_boundaries.increment)); + if (loop_boundaries.increment > 4) + join( result, shfl_down(result, 4,loop_boundaries.increment)); + if (loop_boundaries.increment > 8) + join( result, shfl_down(result, 8,loop_boundaries.increment)); + if (loop_boundaries.increment > 16) + join( result, shfl_down(result, 16,loop_boundaries.increment)); + + init_result = shfl(result,0,loop_boundaries.increment); +#endif +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& + loop_boundaries, const FunctorType & lambda) { + +#ifdef __CUDA_ARCH__ + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + const int VectorLength = blockDim.x; + + iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength; + for(int _i = threadIdx.x; _i < loop_bound; _i += VectorLength) { + value_type val = value_type(); + if(_i<loop_boundaries.end) + lambda(_i , val , false); + + value_type tmp = val; + value_type result_i; + + if(threadIdx.x%VectorLength == 0) + result_i = tmp; + if (VectorLength > 1) { + const value_type tmp2 = shfl_up(tmp, 1,VectorLength); + if(threadIdx.x > 0) + tmp+=tmp2; + } + if(threadIdx.x%VectorLength == 1) + result_i = tmp; + if (VectorLength > 3) { + const value_type tmp2 = shfl_up(tmp, 2,VectorLength); + if(threadIdx.x > 1) + tmp+=tmp2; + } + if ((threadIdx.x%VectorLength >= 2) && + (threadIdx.x%VectorLength < 4)) + result_i = tmp; + if (VectorLength > 7) { + const value_type tmp2 = shfl_up(tmp, 4,VectorLength); + if(threadIdx.x > 3) + tmp+=tmp2; + } + if ((threadIdx.x%VectorLength >= 4) && + (threadIdx.x%VectorLength < 8)) + result_i = tmp; + if (VectorLength > 15) { + const value_type tmp2 = shfl_up(tmp, 8,VectorLength); + if(threadIdx.x > 7) + tmp+=tmp2; + } + if ((threadIdx.x%VectorLength >= 8) && + (threadIdx.x%VectorLength < 16)) + result_i = tmp; + if (VectorLength > 31) { + const value_type tmp2 = shfl_up(tmp, 16,VectorLength); + if(threadIdx.x > 15) + tmp+=tmp2; + } + if (threadIdx.x%VectorLength >= 16) + result_i = tmp; + + val = scan_val + result_i - val; + scan_val += shfl(tmp,VectorLength-1,VectorLength); + if(_i<loop_boundaries.end) + lambda(_i , val , true); + } +#endif +} + +} + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0) lambda(); +#endif +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); +#endif +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda, ValueType& val) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0) lambda(val); + val = shfl(val,0,blockDim.x); +#endif +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0 && threadIdx.y == 0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +#endif +} + +} + +namespace Kokkos { + +namespace Impl { + template< class FunctorType, class ExecPolicy, class ValueType , class Tag = typename ExecPolicy::work_tag> + struct CudaFunctorAdapter { + const FunctorType f; + typedef ValueType value_type; + CudaFunctorAdapter(const FunctorType& f_):f(f_) {} + + __device__ inline + void operator() (typename ExecPolicy::work_tag, const typename ExecPolicy::member_type& i, ValueType& val) const { + //Insert Static Assert with decltype on ValueType equals third argument type of FunctorType::operator() + f(typename ExecPolicy::work_tag(), i,val); + } + }; + + template< class FunctorType, class ExecPolicy, class ValueType > + struct CudaFunctorAdapter<FunctorType,ExecPolicy,ValueType,void> { + const FunctorType f; + typedef ValueType value_type; + CudaFunctorAdapter(const FunctorType& f_):f(f_) {} + + __device__ inline + void operator() (const typename ExecPolicy::member_type& i, ValueType& val) const { + //Insert Static Assert with decltype on ValueType equals second argument type of FunctorType::operator() + f(i,val); + } + __device__ inline + void operator() (typename ExecPolicy::member_type& i, ValueType& val) const { + //Insert Static Assert with decltype on ValueType equals second argument type of FunctorType::operator() + f(i,val); + } + + }; + + template< class FunctorType, class Enable = void> + struct ReduceFunctorHasInit { + enum {value = false}; + }; + + template< class FunctorType> + struct ReduceFunctorHasInit<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::init ) >::type > { + enum {value = true}; + }; + + template< class FunctorType, class Enable = void> + struct ReduceFunctorHasJoin { + enum {value = false}; + }; + + template< class FunctorType> + struct ReduceFunctorHasJoin<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::join ) >::type > { + enum {value = true}; + }; + + template< class FunctorType, class Enable = void> + struct ReduceFunctorHasFinal { + enum {value = false}; + }; + + template< class FunctorType> + struct ReduceFunctorHasFinal<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::final ) >::type > { + enum {value = true}; + }; + + template< class FunctorType, class Enable = void> + struct ReduceFunctorHasShmemSize { + enum {value = false}; + }; + + template< class FunctorType> + struct ReduceFunctorHasShmemSize<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type > { + enum {value = true}; + }; + + template< class FunctorType, bool Enable = + ( FunctorDeclaresValueType<FunctorType,void>::value) || + ( ReduceFunctorHasInit<FunctorType>::value ) || + ( ReduceFunctorHasJoin<FunctorType>::value ) || + ( ReduceFunctorHasFinal<FunctorType>::value ) || + ( ReduceFunctorHasShmemSize<FunctorType>::value ) + > + struct IsNonTrivialReduceFunctor { + enum {value = false}; + }; + + template< class FunctorType> + struct IsNonTrivialReduceFunctor<FunctorType, true> { + enum {value = true}; + }; + + template<class FunctorType, class ResultType, class Tag, bool Enable = IsNonTrivialReduceFunctor<FunctorType>::value > + struct FunctorReferenceType { + typedef ResultType& reference_type; + }; + + template<class FunctorType, class ResultType, class Tag> + struct FunctorReferenceType<FunctorType, ResultType, Tag, true> { + typedef typename Kokkos::Impl::FunctorValueTraits< FunctorType ,Tag >::reference_type reference_type; + }; + + template< class FunctorTypeIn, class ExecPolicy, class ValueType> + struct ParallelReduceFunctorType<FunctorTypeIn,ExecPolicy,ValueType,Cuda> { + + enum {FunctorHasValueType = IsNonTrivialReduceFunctor<FunctorTypeIn>::value }; + typedef typename Kokkos::Impl::if_c<FunctorHasValueType, FunctorTypeIn, Impl::CudaFunctorAdapter<FunctorTypeIn,ExecPolicy,ValueType> >::type functor_type; + static functor_type functor(const FunctorTypeIn& functor_in) { + return Impl::if_c<FunctorHasValueType,FunctorTypeIn,functor_type>::select(functor_in,functor_type(functor_in)); + } + }; + +} + +} // namespace Kokkos +#endif /* defined( __CUDACC__ ) */ + +#endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1778f631c0ef07b2bad25ea2c855e65c258e6f57 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -0,0 +1,433 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_REDUCESCAN_HPP +#define KOKKOS_CUDA_REDUCESCAN_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) + +#include <utility> + +#include <Kokkos_Parallel.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> +#include <impl/Kokkos_Error.hpp> +#include <Cuda/Kokkos_Cuda_Vectorization.hpp> +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + + + +//Shfl based reductions +/* + * Algorithmic constraints: + * (a) threads with same threadIdx.y have same value + * (b) blockDim.x == power of two + * (c) blockDim.z == 1 + */ + +template< class ValueType , class JoinOp> +__device__ +inline void cuda_intra_warp_reduction( ValueType& result, + const JoinOp& join, + const int max_active_thread = blockDim.y) { + + unsigned int shift = 1; + + //Reduce over values from threads with different threadIdx.y + while(blockDim.x * shift < 32 ) { + const ValueType tmp = shfl_down(result, blockDim.x*shift,32u); + //Only join if upper thread is active (this allows non power of two for blockDim.y + if(threadIdx.y + shift < max_active_thread) + join(result , tmp); + shift*=2; + } + + result = shfl(result,0,32); +} + +template< class ValueType , class JoinOp> +__device__ +inline void cuda_inter_warp_reduction( ValueType& value, + const JoinOp& join, + const int max_active_thread = blockDim.y) { + + #define STEP_WIDTH 4 + __shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH]; + ValueType* result = (ValueType*) & sh_result; + const unsigned step = 32 / blockDim.x; + unsigned shift = STEP_WIDTH; + const int id = threadIdx.y%step==0?threadIdx.y/step:65000; + if(id < STEP_WIDTH ) { + result[id] = value; + } + __syncthreads(); + while (shift<=max_active_thread/step) { + if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) { + join(result[id%STEP_WIDTH],value); + } + __syncthreads(); + shift+=STEP_WIDTH; + } + + + value = result[0]; + for(int i = 1; (i*step<max_active_thread) && i<STEP_WIDTH; i++) + join(value,result[i]); +} + +template< class ValueType , class JoinOp> +__device__ +inline void cuda_intra_block_reduction( ValueType& value, + const JoinOp& join, + const int max_active_thread = blockDim.y) { + cuda_intra_warp_reduction(value,join,max_active_thread); + cuda_inter_warp_reduction(value,join,max_active_thread); +} + +template< class FunctorType , class JoinOp , class ArgTag = void > +__device__ +bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgTag >::reference_type value, + typename FunctorValueTraits< FunctorType , ArgTag >::reference_type neutral, + const JoinOp& join, + Cuda::size_type * const m_scratch_space, + typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type const result, + Cuda::size_type * const m_scratch_flags, + const int max_active_thread = blockDim.y) { + typedef typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type pointer_type; + typedef typename FunctorValueTraits< FunctorType , ArgTag >::value_type value_type; + + //Do the intra-block reduction with shfl operations and static shared memory + cuda_intra_block_reduction(value,join,max_active_thread); + + const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; + + //One thread in the block writes block result to global scratch_memory + if(id == 0 ) { + pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x; + *global = value; + } + + //One warp of last block performs inter block reduction through loading the block values from global scratch_memory + bool last_block = false; + + __syncthreads(); + if ( id < 32 ) { + Cuda::size_type count; + + //Figure out whether this is the last block + if(id == 0) + count = Kokkos::atomic_fetch_add(m_scratch_flags,1); + count = Kokkos::shfl(count,0,32); + + //Last block does the inter block reduction + if( count == gridDim.x - 1) { + //set flag back to zero + if(id == 0) + *m_scratch_flags = 0; + last_block = true; + value = neutral; + + pointer_type const volatile global = (pointer_type) m_scratch_space ; + + //Reduce all global values with splitting work over threads in one warp + const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32; + for(int i=id; i<gridDim.x; i+=step_size) { + value_type tmp = global[i]; + join(value, tmp); + } + + //Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32) + if (blockDim.x*blockDim.y > 1) { + value_type tmp = Kokkos::shfl_down(value, 1,32); + if( id + 1 < gridDim.x ) + join(value, tmp); + } + if (blockDim.x*blockDim.y > 2) { + value_type tmp = Kokkos::shfl_down(value, 2,32); + if( id + 2 < gridDim.x ) + join(value, tmp); + } + if (blockDim.x*blockDim.y > 4) { + value_type tmp = Kokkos::shfl_down(value, 4,32); + if( id + 4 < gridDim.x ) + join(value, tmp); + } + if (blockDim.x*blockDim.y > 8) { + value_type tmp = Kokkos::shfl_down(value, 8,32); + if( id + 8 < gridDim.x ) + join(value, tmp); + } + if (blockDim.x*blockDim.y > 16) { + value_type tmp = Kokkos::shfl_down(value, 16,32); + if( id + 16 < gridDim.x ) + join(value, tmp); + } + } + } + + //The last block has in its thread=0 the global reduction value through "value" + return last_block; +} + +//---------------------------------------------------------------------------- +// See section B.17 of Cuda C Programming Guide Version 3.2 +// for discussion of +// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) +// function qualifier which could be used to improve performance. +//---------------------------------------------------------------------------- +// Maximize shared memory and minimize L1 cache: +// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared ); +// For 2.0 capability: 48 KB shared and 16 KB L1 +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/* + * Algorithmic constraints: + * (a) blockDim.y is a power of two + * (b) blockDim.y <= 512 + * (c) blockDim.x == blockDim.z == 1 + */ + +template< bool DoScan , class FunctorType , class ArgTag > +__device__ +void cuda_intra_block_reduce_scan( const FunctorType & functor , + const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data ) +{ + typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; + typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; + + typedef typename ValueTraits::pointer_type pointer_type ; + + const unsigned value_count = ValueTraits::value_count( functor ); + const unsigned BlockSizeMask = blockDim.y - 1 ; + + // Must have power of two thread count + + if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); } + +#define BLOCK_REDUCE_STEP( R , TD , S ) \ + if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); } + +#define BLOCK_SCAN_STEP( TD , N , S ) \ + if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); } + + const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask ; + const pointer_type tdata_intra = base_data + value_count * threadIdx.y ; + + { // Intra-warp reduction: + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0) + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1) + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2) + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3) + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4) + } + + __syncthreads(); // Wait for all warps to reduce + + { // Inter-warp reduce-scan by a single warp to avoid extra synchronizations + const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ; + + if ( rtid_inter < blockDim.y ) { + + const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask ); + + if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) } + if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) } + if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) } + if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) } + + if ( DoScan ) { + + int n = ( rtid_inter & 32 ) ? 32 : ( + ( rtid_inter & 64 ) ? 64 : ( + ( rtid_inter & 128 ) ? 128 : ( + ( rtid_inter & 256 ) ? 256 : 0 ))); + + if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ; + + BLOCK_SCAN_STEP(tdata_inter,n,8) + BLOCK_SCAN_STEP(tdata_inter,n,7) + BLOCK_SCAN_STEP(tdata_inter,n,6) + BLOCK_SCAN_STEP(tdata_inter,n,5) + } + } + } + + __syncthreads(); // Wait for inter-warp reduce-scan to complete + + if ( DoScan ) { + int n = ( rtid_intra & 1 ) ? 1 : ( + ( rtid_intra & 2 ) ? 2 : ( + ( rtid_intra & 4 ) ? 4 : ( + ( rtid_intra & 8 ) ? 8 : ( + ( rtid_intra & 16 ) ? 16 : 0 )))); + + if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ; + + BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,0) + } + +#undef BLOCK_SCAN_STEP +#undef BLOCK_REDUCE_STEP +} + +//---------------------------------------------------------------------------- +/**\brief Input value-per-thread starting at 'shared_data'. + * Reduction value at last thread's location. + * + * If 'DoScan' then write blocks' scan values and block-groups' scan values. + * + * Global reduce result is in the last threads' 'shared_data' location. + */ +template< bool DoScan , class FunctorType , class ArgTag > +__device__ +bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , + const Cuda::size_type block_id , + const Cuda::size_type block_count , + Cuda::size_type * const shared_data , + Cuda::size_type * const global_data , + Cuda::size_type * const global_flags ) +{ + typedef Cuda::size_type size_type ; + typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; + typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; + typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ; + typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + // '__ffs' = position of the least significant bit set to 1. + // 'blockDim.y' is guaranteed to be a power of two so this + // is the integral shift value that can replace an integral divide. + const unsigned BlockSizeShift = __ffs( blockDim.y ) - 1 ; + const unsigned BlockSizeMask = blockDim.y - 1 ; + + // Must have power of two thread count + if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_single_inter_block_reduce_scan requires power-of-two blockDim"); } + + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > + word_count( ValueTraits::value_size( functor ) / sizeof(size_type) ); + + // Reduce the accumulation for the entire block. + cuda_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) ); + + { + // Write accumulation total to global scratch space. + // Accumulation total is the last thread's data. + size_type * const shared = shared_data + word_count.value * BlockSizeMask ; + size_type * const global = global_data + word_count.value * block_id ; + +#if (__CUDA_ARCH__ < 500) + for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; } +#else + for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; } +#endif + + } + + // Contributing blocks note that their contribution has been completed via an atomic-increment flag + // If this block is not the last block to contribute to this group then the block is done. + const bool is_last_block = + ! __syncthreads_or( threadIdx.y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ); + + if ( is_last_block ) { + + const size_type b = ( long(block_count) * long(threadIdx.y) ) >> BlockSizeShift ; + const size_type e = ( long(block_count) * long( threadIdx.y + 1 ) ) >> BlockSizeShift ; + + { + void * const shared_ptr = shared_data + word_count.value * threadIdx.y ; + reference_type shared_value = ValueInit::init( functor , shared_ptr ); + + for ( size_type i = b ; i < e ; ++i ) { + ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i ); + } + } + + cuda_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) ); + + if ( DoScan ) { + + size_type * const shared_value = shared_data + word_count.value * ( threadIdx.y ? threadIdx.y - 1 : blockDim.y ); + + if ( ! threadIdx.y ) { ValueInit::init( functor , shared_value ); } + + // Join previous inclusive scan value to each member + for ( size_type i = b ; i < e ; ++i ) { + size_type * const global_value = global_data + word_count.value * i ; + ValueJoin::join( functor , shared_value , global_value ); + ValueOps ::copy( functor , global_value , shared_value ); + } + } + } + + return is_last_block ; +} + +// Size in bytes required for inter block reduce or scan +template< bool DoScan , class FunctorType , class ArgTag > +inline +unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize ) +{ + return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor ); +} + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( __CUDACC__ ) */ +#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..701d267e1ba39413061afd337ac19c7d6acaacfc --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -0,0 +1,179 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_TaskQueue_impl.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::Cuda > ; + +//---------------------------------------------------------------------------- + +__device__ +void TaskQueueSpecialization< Kokkos::Cuda >::driver + ( TaskQueueSpecialization< Kokkos::Cuda >::queue_type * const queue ) +{ + using Member = TaskExec< Kokkos::Cuda > ; + using Queue = TaskQueue< Kokkos::Cuda > ; + using task_root_type = TaskBase< Kokkos::Cuda , void , void > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member single_exec( 1 ); + Member team_exec( blockDim.y ); + + const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ; + + union { + task_root_type * ptr ; + int raw[2] ; + } task ; + + // Loop until all queues are empty and no tasks in flight + + do { + + // Each team lead attempts to acquire either a thread team task + // or collection of single thread tasks for the team. + + if ( 0 == warp_lane ) { + + task.ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Loop by priority and then type + for ( int i = 0 ; i < Queue::NumQueue && end == task.ptr ; ++i ) { + for ( int j = 0 ; j < 2 && end == task.ptr ; ++j ) { + task.ptr = Queue::pop_task( & queue->m_ready[i][j] ); + } + } + +#if 0 +printf("TaskQueue<Cuda>::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x + , uintptr_t(task.ptr)); +#endif + + } + + // shuffle broadcast + + task.raw[0] = __shfl( task.raw[0] , 0 ); + task.raw[1] = __shfl( task.raw[1] , 0 ); + + if ( 0 == task.ptr ) break ; // 0 == queue->m_ready_count + + if ( end != task.ptr ) { + if ( task_root_type::TaskTeam == task.ptr->m_task_type ) { + // Thread Team Task + (*task.ptr->m_apply)( task.ptr , & team_exec ); + } + else if ( 0 == threadIdx.y ) { + // Single Thread Task + (*task.ptr->m_apply)( task.ptr , & single_exec ); + } + + if ( 0 == warp_lane ) { + queue->complete( task.ptr ); + } + } + } while(1); +} + +namespace { + +__global__ +void cuda_task_queue_execute( TaskQueue< Kokkos::Cuda > * queue ) +{ TaskQueueSpecialization< Kokkos::Cuda >::driver( queue ); } + +} + +void TaskQueueSpecialization< Kokkos::Cuda >::execute + ( TaskQueue< Kokkos::Cuda > * const queue ) +{ + const int warps_per_block = 4 ; + const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); + const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); + const int shared = 0 ; + const cudaStream_t stream = 0 ; + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + +#if 0 +printf("cuda_task_queue_execute before\n"); +#endif + + // Query the stack size, in bytes: + // + // size_t stack_size = 0 ; + // CUDA_SAFE_CALL( cudaDeviceGetLimit( & stack_size , cudaLimitStackSize ) ); + // + // If not large enough then set the stack size, in bytes: + // + // CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , stack_size ) ); + + cuda_task_queue_execute<<< grid , block , shared , stream >>>( queue ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + +#if 0 +printf("cuda_task_queue_execute after\n"); +#endif + +} + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9d9347cc8d57c0c04a228fb0291c0f4e90b6243f --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -0,0 +1,519 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_CUDA_TASK_HPP +#define KOKKOS_IMPL_CUDA_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +template< typename TaskType > +__global__ +void set_cuda_task_base_apply_function_pointer + ( TaskBase<Kokkos::Cuda,void,void>::function_type * ptr ) +{ *ptr = TaskType::apply ; } + +} + +template<> +class TaskQueueSpecialization< Kokkos::Cuda > +{ +public: + + using execution_space = Kokkos::Cuda ; + using memory_space = Kokkos::CudaUVMSpace ; + using queue_type = TaskQueue< execution_space > ; + + static + void iff_single_thread_recursive_execute( queue_type * const ) {} + + __device__ + static void driver( queue_type * const ); + + static + void execute( queue_type * const ); + + template< typename FunctorType > + static + void proc_set_apply( TaskBase<execution_space,void,void>::function_type * ptr ) + { + using TaskType = TaskBase< execution_space + , typename FunctorType::value_type + , FunctorType > ; + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + set_cuda_task_base_apply_function_pointer<TaskType><<<1,1>>>(ptr); + + CUDA_SAFE_CALL( cudaGetLastError() ); + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + } +}; + +extern template class TaskQueue< Kokkos::Cuda > ; + +//---------------------------------------------------------------------------- +/**\brief Impl::TaskExec<Cuda> is the TaskPolicy<Cuda>::member_type + * passed to tasks running in a Cuda space. + * + * Cuda thread blocks for tasking are dimensioned: + * blockDim.x == vector length + * blockDim.y == team size + * blockDim.z == number of teams + * where + * blockDim.x * blockDim.y == WarpSize + * + * Both single thread and thread team tasks are run by a full Cuda warp. + * A single thread task is called by warp lane #0 and the remaining + * lanes of the warp are idle. + */ +template<> +class TaskExec< Kokkos::Cuda > +{ +private: + + TaskExec( TaskExec && ) = delete ; + TaskExec( TaskExec const & ) = delete ; + TaskExec & operator = ( TaskExec && ) = delete ; + TaskExec & operator = ( TaskExec const & ) = delete ; + + friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda > ; + friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Cuda > ; + + const int m_team_size ; + + __device__ + TaskExec( int arg_team_size = blockDim.y ) + : m_team_size( arg_team_size ) {} + +public: + +#if defined( __CUDA_ARCH__ ) + __device__ void team_barrier() { /* __threadfence_block(); */ } + __device__ int team_rank() const { return threadIdx.y ; } + __device__ int team_size() const { return m_team_size ; } +#else + __host__ void team_barrier() {} + __host__ int team_rank() const { return 0 ; } + __host__ int team_size() const { return 0 ; } +#endif + +}; + +//---------------------------------------------------------------------------- + +template<typename iType> +struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Cuda > > +{ + typedef iType index_type; + const iType start ; + const iType end ; + const iType increment ; + const TaskExec< Kokkos::Cuda > & thread; + +#if defined( __CUDA_ARCH__ ) + + __device__ inline + TeamThreadRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count) + : start( threadIdx.y ) + , end(arg_count) + , increment( blockDim.y ) + , thread(arg_thread) + {} + + __device__ inline + TeamThreadRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread + , const iType & arg_start + , const iType & arg_end + ) + : start( arg_start + threadIdx.y ) + , end( arg_end) + , increment( blockDim.y ) + , thread( arg_thread ) + {} + +#else + + TeamThreadRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count); + + TeamThreadRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread + , const iType & arg_start + , const iType & arg_end + ); + +#endif + +}; + +//---------------------------------------------------------------------------- + +template<typename iType> +struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Cuda > > +{ + typedef iType index_type; + const iType start ; + const iType end ; + const iType increment ; + const TaskExec< Kokkos::Cuda > & thread; + +#if defined( __CUDA_ARCH__ ) + + __device__ inline + ThreadVectorRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count) + : start( threadIdx.x ) + , end(arg_count) + , increment( blockDim.x ) + , thread(arg_thread) + {} + +#else + + ThreadVectorRangeBoundariesStruct + ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count); + +#endif + +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > > +TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread + , const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > > +TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & start , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Cuda > >(thread,start,end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > > +ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread + , const iType & count ) +{ + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >(thread,count); +} + +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support. +*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Cuda > >& loop_boundaries + , const Lambda& lambda + ) +{ + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i); + } +} + +// reduce across corresponding lanes between team members within warp +// assume stride*team_size == warp_size +template< typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void strided_shfl_warp_reduction + (const JoinType& join, + ValueType& val, + int team_size, + int stride) +{ + for (int lane_delta=(team_size*stride)>>1; lane_delta>=stride; lane_delta>>=1) { + join(val, Kokkos::shfl_down(val, lane_delta, team_size*stride)); + } +} + +// multiple within-warp non-strided reductions +template< typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void multi_shfl_warp_reduction + (const JoinType& join, + ValueType& val, + int vec_length) +{ + for (int lane_delta=vec_length>>1; lane_delta; lane_delta>>=1) { + join(val, Kokkos::shfl_down(val, lane_delta, vec_length)); + } +} + +// broadcast within warp +template< class ValueType > +KOKKOS_INLINE_FUNCTION +ValueType shfl_warp_broadcast + (ValueType& val, + int src_lane, + int width) +{ + return Kokkos::shfl(val, src_lane, width); +} + +// all-reduce across corresponding vector lanes between team members within warp +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + const JoinType& join, + ValueType& initialized_result) { + + ValueType result = initialized_result; + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + initialized_result = result; + + strided_shfl_warp_reduction<ValueType, JoinType>( + join, + initialized_result, + loop_boundaries.thread.team_size(), + blockDim.x); + initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, threadIdx.x, Impl::CudaTraits::WarpSize ); +} + +// all-reduce across corresponding vector lanes between team members within warp +// if no join() provided, use sum +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) { + + //TODO what is the point of creating this temporary? + ValueType result = initialized_result; + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + initialized_result = result; + + strided_shfl_warp_reduction( + [&] (ValueType& val1, const ValueType& val2) { val1 += val2; }, + initialized_result, + loop_boundaries.thread.team_size(), + blockDim.x); + initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, threadIdx.x, Impl::CudaTraits::WarpSize ); +} + +// all-reduce within team members within warp +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + const JoinType& join, + ValueType& initialized_result) { + + ValueType result = initialized_result; + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + initialized_result = result; + + multi_shfl_warp_reduction<ValueType, JoinType>(join, initialized_result, blockDim.x); + initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, 0, blockDim.x ); +} + +// all-reduce within team members within warp +// if no join() provided, use sum +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) { + + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + initialized_result = result; + + //initialized_result = multi_shfl_warp_reduction( + multi_shfl_warp_reduction( + [&] (ValueType& val1, const ValueType& val2) { val1 += val2; }, + initialized_result, + blockDim.x); + initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, 0, blockDim.x ); +} + +// scan across corresponding vector lanes between team members within warp +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename ValueType, typename iType, class Lambda > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda) { + + ValueType accum = 0 ; + ValueType val, y, local_total; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + val = 0; + lambda(i,val,false); + + // intra-blockDim.y exclusive scan on 'val' + // accum = accumulated, sum in total for this iteration + + // INCLUSIVE scan + for( int offset = blockDim.x ; offset < Impl::CudaTraits::WarpSize ; offset <<= 1 ) { + y = Kokkos::shfl_up(val, offset, Impl::CudaTraits::WarpSize); + if(threadIdx.y*blockDim.x >= offset) { val += y; } + } + + // pass accum to all threads + local_total = shfl_warp_broadcast<ValueType>(val, + threadIdx.x+Impl::CudaTraits::WarpSize-blockDim.x, + Impl::CudaTraits::WarpSize); + + // make EXCLUSIVE scan by shifting values over one + val = Kokkos::shfl_up(val, blockDim.x, Impl::CudaTraits::WarpSize); + if ( threadIdx.y == 0 ) { val = 0 ; } + + val += accum; + lambda(i,val,true); + accum += local_total; + } +} + +// scan within team member (vector) within warp +// assume vec_length*team_size == warp_size +// blockDim.x == vec_length == stride +// blockDim.y == team_size +// threadIdx.x == position in vec +// threadIdx.y == member number +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda) +{ + ValueType accum = 0 ; + ValueType val, y, local_total; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + val = 0; + lambda(i,val,false); + + // intra-blockDim.x exclusive scan on 'val' + // accum = accumulated, sum in total for this iteration + + // INCLUSIVE scan + for( int offset = 1 ; offset < blockDim.x ; offset <<= 1 ) { + y = Kokkos::shfl_up(val, offset, blockDim.x); + if(threadIdx.x >= offset) { val += y; } + } + + // pass accum to all threads + local_total = shfl_warp_broadcast<ValueType>(val, blockDim.x-1, blockDim.x); + + // make EXCLUSIVE scan by shifting values over one + val = Kokkos::shfl_up(val, 1, blockDim.x); + if ( threadIdx.x == 0 ) { val = 0 ; } + + val += accum; + lambda(i,val,true); + accum += local_total; + } +} + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_CUDA_TASK_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb3cd2640d79ad980219861a6e4f0c233c0686bb --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp @@ -0,0 +1,932 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#include <stdio.h> +#include <iostream> +#include <sstream> +#include <Kokkos_Core.hpp> +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +// #define DETAILED_PRINT + +//---------------------------------------------------------------------------- + +#define QLOCK reinterpret_cast<void*>( ~((uintptr_t)0) ) +#define QDENIED reinterpret_cast<void*>( ~((uintptr_t)0) - 1 ) + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +void CudaTaskPolicyQueue::Destroy::destroy_shared_allocation() +{ + // Verify the queue is empty + + if ( m_policy->m_count_ready || + m_policy->m_team[0] || + m_policy->m_team[1] || + m_policy->m_team[2] || + m_policy->m_serial[0] || + m_policy->m_serial[1] || + m_policy->m_serial[2] ) { + Kokkos::abort("CudaTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" ); + } + + m_policy->~CudaTaskPolicyQueue(); + + Kokkos::Cuda::fence(); +} + +CudaTaskPolicyQueue:: +~CudaTaskPolicyQueue() +{ +} + +CudaTaskPolicyQueue:: +CudaTaskPolicyQueue + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_team_size + ) + : m_space( Kokkos::CudaUVMSpace() + , arg_task_max_size * arg_task_max_count * 1.2 + , 16 /* log2(superblock size) */ + ) + , m_team { 0 , 0 , 0 } + , m_serial { 0 , 0 , 0 } + , m_team_size( 32 /* 1 warps */ ) + , m_default_dependence_capacity( arg_task_default_dependence_capacity ) + , m_count_ready(0) +{ + constexpr int max_team_size = 32 * 16 /* 16 warps */ ; + + const int target_team_size = + std::min( int(arg_team_size) , max_team_size ); + + while ( m_team_size < target_team_size ) { m_team_size *= 2 ; } +} + +//----------------------------------------------------------------------- +// Called by each block & thread + +__device__ +void Kokkos::Experimental::Impl::CudaTaskPolicyQueue::driver() +{ + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + +#define IS_TEAM_LEAD ( threadIdx.x == 0 && threadIdx.y == 0 ) + +#ifdef DETAILED_PRINT +if ( IS_TEAM_LEAD ) { + printf( "CudaTaskPolicyQueue::driver() begin on %d with count %d\n" + , blockIdx.x , m_count_ready ); +} +#endif + + // Each thread block must iterate this loop synchronously + // to insure team-execution of team-task + + __shared__ task_root_type * team_task ; + + __syncthreads(); + + do { + + if ( IS_TEAM_LEAD ) { + if ( 0 == m_count_ready ) { + team_task = q_denied ; // All queues are empty and no running tasks + } + else { + team_task = 0 ; + for ( int i = 0 ; i < int(NPRIORITY) && 0 == team_task ; ++i ) { + if ( ( i < 2 /* regular queue */ ) + || ( ! m_space.is_empty() /* waiting for memory */ ) ) { + team_task = pop_ready_task( & m_team[i] ); + } + } + } + } + + __syncthreads(); + +#ifdef DETAILED_PRINT +if ( IS_TEAM_LEAD && 0 != team_task ) { + printf( "CudaTaskPolicyQueue::driver() (%d) team_task(0x%lx)\n" + , blockIdx.x + , (unsigned long) team_task ); +} +#endif + + // team_task == q_denied if all queues are empty + // team_task == 0 if no team tasks available + + if ( q_denied != team_task ) { + if ( 0 != team_task ) { + + Kokkos::Impl::CudaTeamMember + member( kokkos_impl_cuda_shared_memory<void>() + , 16 /* shared_begin */ + , team_task->m_shmem_size /* shared size */ + , 0 /* scratch level 1 pointer */ + , 0 /* scratch level 1 size */ + , 0 /* league rank */ + , 1 /* league size */ + ); + + (*team_task->m_team)( team_task , member ); + + // A __synthreads was called and if completed the + // functor was destroyed. + + if ( IS_TEAM_LEAD ) { + complete_executed_task( team_task ); + } + } + else { + // One thread of one warp performs this serial task + if ( threadIdx.x == 0 && + 0 == ( threadIdx.y % 32 ) ) { + task_root_type * task = 0 ; + for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { + if ( ( i < 2 /* regular queue */ ) + || ( ! m_space.is_empty() /* waiting for memory */ ) ) { + task = pop_ready_task( & m_serial[i] ); + } + } + +#ifdef DETAILED_PRINT +if ( 0 != task ) { + printf( "CudaTaskPolicyQueue::driver() (%2d)(%d) single task(0x%lx)\n" + , blockIdx.x + , threadIdx.y + , (unsigned long) task ); +} +#endif + + if ( task ) { + (*task->m_serial)( task ); + complete_executed_task( task ); + } + } + + __syncthreads(); + } + } + } while ( q_denied != team_task ); + +#ifdef DETAILED_PRINT +if ( IS_TEAM_LEAD ) { + printf( "CudaTaskPolicyQueue::driver() end on %d with count %d\n" + , blockIdx.x , m_count_ready ); +} +#endif + +#undef IS_TEAM_LEAD +} + +//----------------------------------------------------------------------- + +__device__ +CudaTaskPolicyQueue::task_root_type * +CudaTaskPolicyQueue::pop_ready_task( + CudaTaskPolicyQueue::task_root_type * volatile * const queue ) +{ + task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); + task_root_type * task = 0 ; + task_root_type * const task_claim = *queue ; + + if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) { + + // Queue is not locked and not null, try to claim head of queue. + // Is a race among threads to claim the queue. + + if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) { + + // Aquired the task which must be in the waiting state. + + const int claim_state = + atomic_compare_exchange( & task_claim->m_state + , int(TASK_STATE_WAITING) + , int(TASK_STATE_EXECUTING) ); + + task_root_type * lock_verify = 0 ; + + if ( claim_state == int(TASK_STATE_WAITING) ) { + + // Transitioned this task from waiting to executing + // Update the queue to the next entry and release the lock + + task_root_type * const next = + *((task_root_type * volatile *) & task_claim->m_next ); + + *((task_root_type * volatile *) & task_claim->m_next ) = 0 ; + + lock_verify = atomic_compare_exchange( queue , q_lock , next ); + } + + if ( ( claim_state != int(TASK_STATE_WAITING) ) | + ( q_lock != lock_verify ) ) { + + printf( "CudaTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n" + , (unsigned long) queue + , (unsigned long) task + , claim_state + , ( claim_state != int(TASK_STATE_WAITING) + ? "NOT WAITING" + : "UNLOCK" ) ); + Kokkos::abort("CudaTaskPolicyQueue::pop_ready_task"); + } + + task = task_claim ; + } + } + return task ; +} + +//----------------------------------------------------------------------- + +__device__ +void CudaTaskPolicyQueue::complete_executed_task( + CudaTaskPolicyQueue::task_root_type * task ) +{ + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + +#ifdef DETAILED_PRINT +printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) state(%d) (%d)(%d,%d)\n" + , (unsigned long) task + , task->m_state + , blockIdx.x + , threadIdx.x + , threadIdx.y + ); +#endif + + // State is either executing or if respawned then waiting, + // try to transition from executing to complete. + // Reads the current value. + + const int state_old = + atomic_compare_exchange( & task->m_state + , int(Kokkos::Experimental::TASK_STATE_EXECUTING) + , int(Kokkos::Experimental::TASK_STATE_COMPLETE) ); + + if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) { + /* Task requested a respawn so reschedule it */ + schedule_task( task , false /* not initial spawn */ ); + } + else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) { + /* Task is complete */ + + // Clear dependences of this task before locking wait queue + + task->clear_dependence(); + + // Stop other tasks from adding themselves to this task's wait queue. + // The wait queue is updated concurrently so guard with an atomic. + + task_root_type * wait_queue = *((task_root_type * volatile *) & task->m_wait ); + task_root_type * wait_queue_old = 0 ; + + do { + wait_queue_old = wait_queue ; + wait_queue = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied ); + } while ( wait_queue_old != wait_queue ); + + // The task has been removed from ready queue and + // execution is complete so decrement the reference count. + // The reference count was incremented by the initial spawning. + // The task may be deleted if this was the last reference. + + task_root_type::assign( & task , 0 ); + + // Pop waiting tasks and schedule them + while ( wait_queue ) { + task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; + schedule_task( x , false /* not initial spawn */ ); + } + } + else { + printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n" + , (unsigned long)( task ) + , int(state_old) + , task->m_dep_size + ); + Kokkos::abort("CudaTaskPolicyQueue::complete_executed_task" ); + } + + // If the task was respawned it may have already been + // put in a ready queue and the count incremented. + // By decrementing the count last it will never go to zero + // with a ready or executing task. + + atomic_fetch_add( & m_count_ready , -1 ); +} + +__device__ +void TaskMember< Kokkos::Cuda , void , void >::latch_add( const int k ) +{ + typedef TaskMember< Kokkos::Cuda , void , void > task_root_type ; + + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + const bool ok_input = 0 < k ; + + const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k + : k ; + + const bool ok_count = 0 <= count ; + + const int state = 0 != count ? TASK_STATE_WAITING : + atomic_compare_exchange( & m_state + , TASK_STATE_WAITING + , TASK_STATE_COMPLETE ); + + const bool ok_state = state == TASK_STATE_WAITING ; + + if ( ! ok_count || ! ok_state ) { + printf( "CudaTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n" + , (unsigned long) this + , k + , ( ! ok_input ? "Non-positive input" : + ( ! ok_count ? "Negative count" : "Bad State" ) ) + , ( ! ok_input ? k : + ( ! ok_count ? count : state ) ) + ); + Kokkos::abort( "CudaTaskPolicyQueue::latch_add ERROR" ); + } + else if ( 0 == count ) { + // Stop other tasks from adding themselves to this latch's wait queue. + // The wait queue is updated concurrently so guard with an atomic. + + CudaTaskPolicyQueue & policy = *m_policy ; + task_root_type * wait_queue = *((task_root_type * volatile *) &m_wait); + task_root_type * wait_queue_old = 0 ; + + do { + wait_queue_old = wait_queue ; + wait_queue = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied ); + } while ( wait_queue_old != wait_queue ); + + // Pop waiting tasks and schedule them + while ( wait_queue ) { + task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; + policy.schedule_task( x , false /* not initial spawn */ ); + } + } +} + +//---------------------------------------------------------------------------- + +void CudaTaskPolicyQueue::reschedule_task( + CudaTaskPolicyQueue::task_root_type * const task ) +{ + // Reschedule transitions from executing back to waiting. + const int old_state = + atomic_compare_exchange( & task->m_state + , int(TASK_STATE_EXECUTING) + , int(TASK_STATE_WAITING) ); + + if ( old_state != int(TASK_STATE_EXECUTING) ) { + + printf( "CudaTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n" + , (unsigned long) task + , old_state + ); + Kokkos::abort("CudaTaskPolicyQueue::reschedule" ); + } +} + +KOKKOS_FUNCTION +void CudaTaskPolicyQueue::schedule_task( + CudaTaskPolicyQueue::task_root_type * const task , + const bool initial_spawn ) +{ + task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + //---------------------------------------- + // State is either constructing or already waiting. + // If constructing then transition to waiting. + + { + const int old_state = atomic_compare_exchange( & task->m_state + , int(TASK_STATE_CONSTRUCTING) + , int(TASK_STATE_WAITING) ); + + // Head of linked list of tasks waiting on this task + task_root_type * const waitTask = + *((task_root_type * volatile const *) & task->m_wait ); + + // Member of linked list of tasks waiting on some other task + task_root_type * const next = + *((task_root_type * volatile const *) & task->m_next ); + + // An incomplete and non-executing task has: + // task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING + // task->m_wait != q_denied + // task->m_next == 0 + // + if ( ( q_denied == waitTask ) || + ( 0 != next ) || + ( old_state != int(TASK_STATE_CONSTRUCTING) && + old_state != int(TASK_STATE_WAITING) ) ) { + printf( "CudaTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n" + , (unsigned long) task + , old_state + , (unsigned long) waitTask + , (unsigned long) next ); + Kokkos::abort("CudaTaskPolicyQueue::schedule" ); + } + } + + //---------------------------------------- + + if ( initial_spawn ) { + // The initial spawn of a task increments the reference count + // for the task's existence in either a waiting or ready queue + // until the task has completed. + // Completing the task's execution is the matching + // decrement of the reference count. + task_root_type::assign( 0 , task ); + } + + //---------------------------------------- + // Insert this task into a dependence task that is not complete. + // Push on to that task's wait queue. + + bool attempt_insert_in_queue = true ; + + task_root_type * volatile * queue = + task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ; + + for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) { + + task_root_type * const head_value_old = *queue ; + + if ( q_denied == head_value_old ) { + // Wait queue is closed because task is complete, + // try again with the next dependence wait queue. + ++i ; + queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait + : (task_root_type **) 0 ; + } + else { + + // Wait queue is open and not denied. + // Have exclusive access to this task. + // Assign m_next assuming a successfull insertion into the queue. + // Fence the memory assignment before attempting the CAS. + + *((task_root_type * volatile *) & task->m_next ) = head_value_old ; + + memory_fence(); + + // Attempt to insert this task into the queue. + // If fails then continue the attempt. + + attempt_insert_in_queue = + head_value_old != atomic_compare_exchange(queue,head_value_old,task); + } + } + + //---------------------------------------- + // All dependences are complete, insert into the ready list + + if ( attempt_insert_in_queue ) { + + // Increment the count of ready tasks. + // Count will be decremented when task is complete. + + atomic_fetch_add( & m_count_ready , 1 ); + + queue = task->m_queue ; + + while ( attempt_insert_in_queue ) { + + // A locked queue is being popped. + + task_root_type * const head_value_old = *queue ; + + if ( q_lock != head_value_old ) { + // Read the head of ready queue, + // if same as previous value then CAS locks the ready queue + + // Have exclusive access to this task, + // assign to head of queue, assuming successful insert + // Fence assignment before attempting insert. + *((task_root_type * volatile *) & task->m_next ) = head_value_old ; + + memory_fence(); + + attempt_insert_in_queue = + head_value_old != atomic_compare_exchange(queue,head_value_old,task); + } + } + } +} + +void CudaTaskPolicyQueue::deallocate_task + ( CudaTaskPolicyQueue::task_root_type * const task ) +{ + m_space.deallocate( task , task->m_size_alloc ); +} + +KOKKOS_FUNCTION +CudaTaskPolicyQueue::task_root_type * +CudaTaskPolicyQueue::allocate_task + ( const unsigned arg_sizeof_task + , const unsigned arg_dep_capacity + , const unsigned arg_team_shmem + ) +{ + const unsigned base_size = arg_sizeof_task + + ( arg_sizeof_task % sizeof(task_root_type*) + ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*) + : 0 ); + + const unsigned dep_capacity + = ~0u == arg_dep_capacity + ? m_default_dependence_capacity + : arg_dep_capacity ; + + const unsigned size_alloc = + base_size + sizeof(task_root_type*) * dep_capacity ; + + task_root_type * const task = + reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) ); + + if ( task != 0 ) { + + // Initialize task's root and value data structure + // Calling function must copy construct the functor. + + new( (void*) task ) task_root_type(); + + task->m_policy = this ; + task->m_size_alloc = size_alloc ; + task->m_dep_capacity = dep_capacity ; + task->m_shmem_size = arg_team_shmem ; + + if ( dep_capacity ) { + task->m_dep = + reinterpret_cast<task_root_type**>( + reinterpret_cast<unsigned char*>(task) + base_size ); + + for ( unsigned i = 0 ; i < dep_capacity ; ++i ) + task->task_root_type::m_dep[i] = 0 ; + } + } + return task ; +} + +//---------------------------------------------------------------------------- + +void CudaTaskPolicyQueue::add_dependence + ( CudaTaskPolicyQueue::task_root_type * const after + , CudaTaskPolicyQueue::task_root_type * const before + ) +{ + if ( ( after != 0 ) && ( before != 0 ) ) { + + int const state = *((volatile const int *) & after->m_state ); + + // Only add dependence during construction or during execution. + // Both tasks must have the same policy. + // Dependence on non-full memory cannot be mixed with any other dependence. + + const bool ok_state = + Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state || + Kokkos::Experimental::TASK_STATE_EXECUTING == state ; + + const bool ok_capacity = + after->m_dep_size < after->m_dep_capacity ; + + const bool ok_policy = + after->m_policy == this && before->m_policy == this ; + + if ( ok_state && ok_capacity && ok_policy ) { + + ++after->m_dep_size ; + + task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before ); + + memory_fence(); + } + else { + +printf( "CudaTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n" + , (unsigned long) after + , (unsigned long) before + , ( ! ok_state ? "Task not constructing or executing" : + ( ! ok_capacity ? "Task Exceeded dependence capacity" + : "Tasks from different policies" )) ); + + Kokkos::abort("CudaTaskPolicyQueue::add_dependence ERROR"); + } + } +} + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +TaskPolicy< Kokkos::Cuda >::TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ) + : m_track() + , m_policy(0) +{ + // Allocate the queue data sructure in UVM space + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord + < Kokkos::CudaUVMSpace , Impl::CudaTaskPolicyQueue::Destroy > record_type ; + + record_type * record = + record_type::allocate( Kokkos::CudaUVMSpace() + , "CudaUVM task queue" + , sizeof(Impl::CudaTaskPolicyQueue) + ); + + m_policy = reinterpret_cast< Impl::CudaTaskPolicyQueue * >( record->data() ); + + // Tasks are allocated with application's task size + sizeof(task_root_type) + + const size_t full_task_size_estimate = + arg_task_max_size + + sizeof(task_root_type) + + sizeof(task_root_type*) * arg_task_default_dependence_capacity ; + + new( m_policy ) + Impl::CudaTaskPolicyQueue( arg_task_max_count + , full_task_size_estimate + , arg_task_default_dependence_capacity + , arg_task_team_size ); + + record->m_destroy.m_policy = m_policy ; + + m_track.assign_allocated_record_to_uninitialized( record ); +} + +__global__ +static void kokkos_cuda_task_policy_queue_driver + ( Kokkos::Experimental::Impl::CudaTaskPolicyQueue * queue ) +{ + queue->driver(); +} + +void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Cuda > & policy ) +{ + const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); + const dim3 block( 1 , policy.m_policy->m_team_size , 1 ); + + const int shared = 0 ; // Kokkos::Impl::CudaTraits::SharedMemoryUsage / 2 ; + const cudaStream_t stream = 0 ; + + +#ifdef DETAILED_PRINT +printf("kokkos_cuda_task_policy_queue_driver grid(%d,%d,%d) block(%d,%d,%d) shared(%d) policy(0x%lx)\n" + , grid.x , grid.y , grid.z + , block.x , block.y , block.z + , shared + , (unsigned long)( policy.m_policy ) ); +fflush(stdout); +#endif + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + +/* + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig( kokkos_cuda_task_policy_queue_driver + , cudaFuncCachePreferL1 ) ); + + CUDA_SAFE_CALL( cudaGetLastError() ); +*/ + + kokkos_cuda_task_policy_queue_driver<<< grid , block , shared , stream >>> + ( policy.m_policy ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + +#ifdef DETAILED_PRINT +printf("kokkos_cuda_task_policy_queue_driver end\n"); +fflush(stdout); +#endif + +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +typedef TaskMember< Kokkos::Cuda , void , void > Task ; + +__host__ __device__ +Task::~TaskMember() +{ +} + +__host__ __device__ +void Task::assign( Task ** const lhs_ptr , Task * rhs ) +{ + Task * const q_denied = reinterpret_cast<Task*>(QDENIED); + + // Increment rhs reference count. + if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); } + + if ( 0 == lhs_ptr ) return ; + + // Must have exclusive access to *lhs_ptr. + // Assign the pointer and retrieve the previous value. + // Cannot use atomic exchange since *lhs_ptr may be + // in Cuda register space. + +#if 0 + + Task * const old_lhs = *((Task*volatile*)lhs_ptr); + + *((Task*volatile*)lhs_ptr) = rhs ; + + Kokkos::memory_fence(); + +#else + + Task * const old_lhs = *lhs_ptr ; + + *lhs_ptr = rhs ; + +#endif + + if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) { + Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR different queues"); + } + + if ( old_lhs ) { + + Kokkos::memory_fence(); + + // Decrement former lhs reference count. + // If reference count is zero task must be complete, then delete task. + // Task is ready for deletion when wait == q_denied + + int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ; + int const state = old_lhs->m_state ; + Task * const wait = *((Task * const volatile *) & old_lhs->m_wait ); + + const bool ok_count = 0 <= count ; + + // If count == 0 then will be deleting + // and must either be constructing or complete. + const bool ok_state = 0 < count ? true : + ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) || + ( state == int(TASK_STATE_COMPLETE) && wait == q_denied ) ) + && + old_lhs->m_next == 0 && + old_lhs->m_dep_size == 0 ; + + if ( ! ok_count || ! ok_state ) { + + printf( "%s Kokkos::Impl::TaskManager<Kokkos::Cuda>::assign ERROR deleting task(0x%lx) m_ref_count(%d) m_state(%d) m_wait(0x%ld)\n" +#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_CUDA ) + , "CUDA " +#else + , "HOST " +#endif + , (unsigned long) old_lhs + , count + , state + , (unsigned long) wait ); + Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR deleting"); + } + + if ( count == 0 ) { + // When 'count == 0' this thread has exclusive access to 'old_lhs' + +#ifdef DETAILED_PRINT +printf( "Task::assign(...) old_lhs(0x%lx) deallocate\n" + , (unsigned long) old_lhs + ); +#endif + + old_lhs->m_policy->deallocate_task( old_lhs ); + } + } +} + +//---------------------------------------------------------------------------- + +__device__ +int Task::get_dependence() const +{ + return m_dep_size ; +} + +__device__ +Task * Task::get_dependence( int i ) const +{ + Task * const t = ((Task*volatile*)m_dep)[i] ; + + if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) { + +printf( "TaskMember< Cuda >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n" + , (unsigned long) this + , m_state + , m_dep_size + , i + , (unsigned long) t + ); + + Kokkos::abort("TaskMember< Cuda >::get_dependence ERROR"); + } + + return t ; +} + +//---------------------------------------------------------------------------- + +__device__ __host__ +void Task::clear_dependence() +{ + for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) { + assign( m_dep + i , 0 ); + } + + *((volatile int *) & m_dep_size ) = 0 ; + + memory_fence(); +} + +//---------------------------------------------------------------------------- + + +//---------------------------------------------------------------------------- + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e71512f0391b3e264341222b82918d9901080061 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp @@ -0,0 +1,833 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_CUDA_TASKPOLICY_HPP +#define KOKKOS_CUDA_TASKPOLICY_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Cuda.hpp> +#include <Kokkos_TaskPolicy.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct CudaTaskPolicyQueue ; + +/** \brief Base class for all Kokkos::Cuda tasks */ +template<> +class TaskMember< Kokkos::Cuda , void , void > { +public: + + template< class > friend class Kokkos::Experimental::TaskPolicy ; + friend struct CudaTaskPolicyQueue ; + + typedef void (* function_single_type) ( TaskMember * ); + typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::CudaTeamMember & ); + +private: + + CudaTaskPolicyQueue * m_policy ; + TaskMember * volatile * m_queue ; + function_team_type m_team ; ///< Apply function on CUDA + function_single_type m_serial ; ///< Apply function on CUDA + TaskMember ** m_dep ; ///< Dependences + TaskMember * m_wait ; ///< Linked list of tasks waiting on this task + TaskMember * m_next ; ///< Linked list of tasks waiting on a different task + int m_dep_capacity ; ///< Capacity of dependences + int m_dep_size ; ///< Actual count of dependences + int m_size_alloc ; + int m_shmem_size ; + int m_ref_count ; ///< Reference count + int m_state ; ///< State of the task + + + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; + +protected: + + KOKKOS_INLINE_FUNCTION + TaskMember() + : m_policy(0) + , m_queue(0) + , m_team(0) + , m_serial(0) + , m_dep(0) + , m_wait(0) + , m_next(0) + , m_size_alloc(0) + , m_dep_capacity(0) + , m_dep_size(0) + , m_shmem_size(0) + , m_ref_count(0) + , m_state( TASK_STATE_CONSTRUCTING ) + {} + +public: + + KOKKOS_FUNCTION + ~TaskMember(); + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return *((volatile int *) & m_ref_count ); } + + // Cannot use the function pointer to verify the type + // since the function pointer is not unique between + // Host and Cuda. Don't run verificaton for Cuda. + // Assume testing on Host-only back-end will catch such errors. + + template< typename ResultType > + KOKKOS_INLINE_FUNCTION static + TaskMember * verify_type( TaskMember * t ) { return t ; } + + //---------------------------------------- + /* Inheritence Requirements on task types: + * + * class DerivedTaskType + * : public TaskMember< Cuda , DerivedType::value_type , FunctorType > + * { ... }; + * + * class TaskMember< Cuda , DerivedType::value_type , FunctorType > + * : public TaskMember< Cuda , DerivedType::value_type , void > + * , public Functor + * { ... }; + * + * If value_type != void + * class TaskMember< Cuda , value_type , void > + * : public TaskMember< Cuda , void , void > + * + * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] + * + */ + //---------------------------------------- + // If after the 'apply' the task's state is waiting + // then it will be rescheduled and called again. + // Otherwise the functor must be destroyed. + + template< class DerivedTaskType , class Tag > + __device__ static + void apply_single( + typename std::enable_if + <( std::is_same< Tag , void >::value && + std::is_same< typename DerivedTaskType::result_type , void >::value + ), TaskMember * >::type t ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + functor_type * const f = + static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); + + f->apply(); + + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + + template< class DerivedTaskType , class Tag > + __device__ static + void apply_single( + typename std::enable_if + <( std::is_same< Tag , void >::value && + ! std::is_same< typename DerivedTaskType::result_type , void >::value + ), TaskMember * >::type t ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); + functor_type * const f = static_cast< functor_type * >( self ); + + f->apply( self->m_result ); + + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + + template< class DerivedTaskType , class Tag > + __device__ + void set_apply_single() + { + m_serial = & TaskMember::template apply_single<DerivedTaskType,Tag> ; + } + + //---------------------------------------- + + template< class DerivedTaskType , class Tag > + __device__ static + void apply_team( + typename std::enable_if + <( std::is_same<Tag,void>::value && + std::is_same<typename DerivedTaskType::result_type,void>::value + ), TaskMember * >::type t + , Kokkos::Impl::CudaTeamMember & member + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + functor_type * const f = + static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); + + f->apply( member ); + + __syncthreads(); // Wait for team to finish calling function + + if ( threadIdx.x == 0 && + threadIdx.y == 0 && + t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + + template< class DerivedTaskType , class Tag > + __device__ static + void apply_team( + typename std::enable_if + <( std::is_same<Tag,void>::value && + ! std::is_same<typename DerivedTaskType::result_type,void>::value + ), TaskMember * >::type t + , Kokkos::Impl::CudaTeamMember & member + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); + functor_type * const f = static_cast< functor_type * >( self ); + + f->apply( member , self->m_result ); + + __syncthreads(); // Wait for team to finish calling function + + if ( threadIdx.x == 0 && + threadIdx.y == 0 && + t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + + template< class DerivedTaskType , class Tag > + __device__ + void set_apply_team() + { + m_team = & TaskMember::template apply_team<DerivedTaskType,Tag> ; + } + + //---------------------------------------- + + KOKKOS_FUNCTION static + void assign( TaskMember ** const lhs , TaskMember * const rhs ); + + __device__ + TaskMember * get_dependence( int i ) const ; + + __device__ + int get_dependence() const ; + + KOKKOS_FUNCTION void clear_dependence(); + + __device__ + void latch_add( const int k ); + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION static + void construct_result( TaskMember * const ) {} + + typedef FutureValueTypeIsVoidError get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return get_result_type() ; } + + KOKKOS_INLINE_FUNCTION + Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } + +}; + +/** \brief A Future< Kokkos::Cuda , ResultType > will cast + * from TaskMember< Kokkos::Cuda , void , void > + * to TaskMember< Kokkos::Cuda , ResultType , void > + * to query the result. + */ +template< class ResultType > +class TaskMember< Kokkos::Cuda , ResultType , void > + : public TaskMember< Kokkos::Cuda , void , void > +{ +public: + + typedef ResultType result_type ; + + result_type m_result ; + + typedef const result_type & get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return m_result ; } + + KOKKOS_INLINE_FUNCTION static + void construct_result( TaskMember * const ptr ) + { + new((void*)(& ptr->m_result)) result_type(); + } + + TaskMember() = delete ; + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; +}; + +/** \brief Callback functions will cast + * from TaskMember< Kokkos::Cuda , void , void > + * to TaskMember< Kokkos::Cuda , ResultType , FunctorType > + * to execute work functions. + */ +template< class ResultType , class FunctorType > +class TaskMember< Kokkos::Cuda , ResultType , FunctorType > + : public TaskMember< Kokkos::Cuda , ResultType , void > + , public FunctorType +{ +public: + typedef ResultType result_type ; + typedef FunctorType functor_type ; + + KOKKOS_INLINE_FUNCTION static + void copy_construct( TaskMember * const ptr + , const functor_type & arg_functor ) + { + typedef TaskMember< Kokkos::Cuda , ResultType , void > base_type ; + + new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor ); + + base_type::construct_result( static_cast<base_type*>( ptr ) ); + } + + TaskMember() = delete ; + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; +}; + +//---------------------------------------------------------------------------- + +namespace { + +template< class DerivedTaskType , class Tag > +__global__ +void cuda_set_apply_single( DerivedTaskType * task ) +{ + typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > + task_root_type ; + + task->task_root_type::template set_apply_single< DerivedTaskType , Tag >(); +} + +template< class DerivedTaskType , class Tag > +__global__ +void cuda_set_apply_team( DerivedTaskType * task ) +{ + typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > + task_root_type ; + + task->task_root_type::template set_apply_team< DerivedTaskType , Tag >(); +} + +} /* namespace */ +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct CudaTaskPolicyQueue { + + enum { NPRIORITY = 3 }; + + // Must use UVM so that tasks can be created in both + // Host and Cuda space. + + typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace > + memory_space ; + + typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > + task_root_type ; + + memory_space m_space ; + task_root_type * m_team[ NPRIORITY ] ; + task_root_type * m_serial[ NPRIORITY ]; + int m_team_size ; + int m_default_dependence_capacity ; + int volatile m_count_ready ; ///< Ready plus executing tasks + + // Execute tasks until all non-waiting tasks are complete + __device__ + void driver(); + + __device__ static + task_root_type * pop_ready_task( task_root_type * volatile * const queue ); + + // When a task finishes executing. + __device__ + void complete_executed_task( task_root_type * ); + + KOKKOS_FUNCTION void schedule_task( task_root_type * const + , const bool initial_spawn = true ); + KOKKOS_FUNCTION void reschedule_task( task_root_type * const ); + KOKKOS_FUNCTION + void add_dependence( task_root_type * const after + , task_root_type * const before ); + + + CudaTaskPolicyQueue() = delete ; + CudaTaskPolicyQueue( CudaTaskPolicyQueue && ) = delete ; + CudaTaskPolicyQueue( const CudaTaskPolicyQueue & ) = delete ; + CudaTaskPolicyQueue & operator = ( CudaTaskPolicyQueue && ) = delete ; + CudaTaskPolicyQueue & operator = ( const CudaTaskPolicyQueue & ) = delete ; + + + ~CudaTaskPolicyQueue(); + + // Construct only on the Host + CudaTaskPolicyQueue + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ); + + struct Destroy { + CudaTaskPolicyQueue * m_policy ; + void destroy_shared_allocation(); + }; + + //---------------------------------------- + /** \brief Allocate and construct a task. + * + * Allocate space for DerivedTaskType followed + * by TaskMember*[ dependence_capacity ] + */ + KOKKOS_FUNCTION + task_root_type * + allocate_task( const unsigned arg_sizeof_task + , const unsigned arg_dep_capacity + , const unsigned arg_team_shmem = 0 ); + + KOKKOS_FUNCTION void deallocate_task( task_root_type * const ); +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +void wait( TaskPolicy< Kokkos::Cuda > & ); + +template<> +class TaskPolicy< Kokkos::Cuda > +{ +public: + + typedef Kokkos::Cuda execution_space ; + typedef TaskPolicy execution_policy ; + typedef Kokkos::Impl::CudaTeamMember member_type ; + +private: + + typedef Impl::TaskMember< Kokkos::Cuda , void , void > task_root_type ; + typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace > memory_space ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + Impl::CudaTaskPolicyQueue * m_policy ; + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION static + const task_root_type * get_task_root( const FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION static + task_root_type * get_task_root( FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< task_root_type * >( static_cast< task_type * >(f) ); + } + +public: + + TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity = 4 + , const unsigned arg_task_team_size = 0 /* choose default */ + ); + + KOKKOS_FUNCTION TaskPolicy() = default ; + KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; + + KOKKOS_FUNCTION + int allocated_task_count() const { return 0 ; } + + //---------------------------------------- + // Create serial-thread task + // Main process and tasks must use different functions + // to work around CUDA limitation where __host__ __device__ + // functions are not allowed to invoke templated __global__ functions. + + template< class FunctorType > + Future< typename FunctorType::value_type , execution_space > + proc_create( const FunctorType & arg_functor + , const unsigned arg_dep_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + + typedef Impl::TaskMember< execution_space , value_type , FunctorType > + task_type ; + + task_type * const task = + static_cast<task_type*>( + m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) ); + + if ( task ) { + // The root part of the class has been constructed. + // Must now construct the functor and result specific part. + + task_type::copy_construct( task , arg_functor ); + + // Setting the apply pointer on the device requires code + // executing on the GPU. This function is called on the + // host process so a kernel must be run. + + // Launching a kernel will cause the allocated task in + // UVM memory to be copied to the GPU. + // Synchronize to guarantee non-concurrent access + // between host and device. + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + Impl::cuda_set_apply_single<task_type,void><<<1,1>>>( task ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + } + + return Future< value_type , execution_space >( task ); + } + + template< class FunctorType > + __device__ + Future< typename FunctorType::value_type , execution_space > + task_create( const FunctorType & arg_functor + , const unsigned arg_dep_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + + typedef Impl::TaskMember< execution_space , value_type , FunctorType > + task_type ; + + task_type * const task = + static_cast<task_type*>( + m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) ); + + if ( task ) { + // The root part of the class has been constructed. + // Must now construct the functor and result specific part. + + task_type::copy_construct( task , arg_functor ); + + // Setting the apply pointer on the device requires code + // executing on the GPU. If this function is called on the + // Host then a kernel must be run. + + task->task_root_type::template set_apply_single< task_type , void >(); + } + + return Future< value_type , execution_space >( task ); + } + + //---------------------------------------- + // Create thread-team task + // Main process and tasks must use different functions + // to work around CUDA limitation where __host__ __device__ + // functions are not allowed to invoke templated __global__ functions. + + template< class FunctorType > + Future< typename FunctorType::value_type , execution_space > + proc_create_team( const FunctorType & arg_functor + , const unsigned arg_dep_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + + typedef Impl::TaskMember< execution_space , value_type , FunctorType > + task_type ; + + const unsigned team_shmem_size = + Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value + ( arg_functor , m_policy->m_team_size ); + + task_type * const task = + static_cast<task_type*>( + m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) ); + + if ( task ) { + // The root part of the class has been constructed. + // Must now construct the functor and result specific part. + + task_type::copy_construct( task , arg_functor ); + + // Setting the apply pointer on the device requires code + // executing on the GPU. This function is called on the + // host process so a kernel must be run. + + // Launching a kernel will cause the allocated task in + // UVM memory to be copied to the GPU. + // Synchronize to guarantee non-concurrent access + // between host and device. + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + Impl::cuda_set_apply_team<task_type,void><<<1,1>>>( task ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + } + + return Future< value_type , execution_space >( task ); + } + + template< class FunctorType > + __device__ + Future< typename FunctorType::value_type , execution_space > + task_create_team( const FunctorType & arg_functor + , const unsigned arg_dep_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + + typedef Impl::TaskMember< execution_space , value_type , FunctorType > + task_type ; + + const unsigned team_shmem_size = + Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value + ( arg_functor , m_policy->m_team_size ); + + task_type * const task = + static_cast<task_type*>( + m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) ); + + if ( task ) { + // The root part of the class has been constructed. + // Must now construct the functor and result specific part. + + task_type::copy_construct( task , arg_functor ); + + // Setting the apply pointer on the device requires code + // executing on the GPU. If this function is called on the + // Host then a kernel must be run. + + task->task_root_type::template set_apply_team< task_type , void >(); + } + + return Future< value_type , execution_space >( task ); + } + + //---------------------------------------- + + Future< Latch , execution_space > + KOKKOS_INLINE_FUNCTION + create_latch( const int N ) const + { + task_root_type * const task = + m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 ); + task->m_dep_size = N ; // Using m_dep_size for latch counter + task->m_state = TASK_STATE_WAITING ; + return Future< Latch , execution_space >( task ); + } + + //---------------------------------------- + + template< class A1 , class A2 , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( const Future<A1,A2> & after + , const Future<A3,A4> & before + , typename std::enable_if + < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value + && + std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const + { m_policy->add_dependence( after.m_task , before.m_task ); } + + template< class FunctorType , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( FunctorType * task_functor + , const Future<A3,A4> & before + , typename std::enable_if + < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const + { m_policy->add_dependence( get_task_root(task_functor) , before.m_task ); } + + + template< class ValueType > + KOKKOS_INLINE_FUNCTION + const Future< ValueType , execution_space > & + spawn( const Future< ValueType , execution_space > & f + , const bool priority = false ) const + { + if ( f.m_task ) { + f.m_task->m_queue = + ( f.m_task->m_team != 0 + ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) + : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); + m_policy->schedule_task( f.m_task ); + } + return f ; + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn( FunctorType * task_functor + , const bool priority = false ) const + { + task_root_type * const t = get_task_root(task_functor); + t->m_queue = + ( t->m_team != 0 ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) + : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); + m_policy->reschedule_task( t ); + } + + // When a create method fails by returning a null Future + // the task that called the create method may respawn + // with a dependence on memory becoming available. + // This is a race as more than one task may be respawned + // with this need. + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn_needing_memory( FunctorType * task_functor ) const + { + task_root_type * const t = get_task_root(task_functor); + t->m_queue = + ( t->m_team != 0 ? & ( m_policy->m_team[ 2 ] ) + : & ( m_policy->m_serial[ 2 ] ) ); + m_policy->reschedule_task( t ); + } + + //---------------------------------------- + // Functions for an executing task functor to query dependences, + // set new dependences, and respawn itself. + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< void , execution_space > + get_dependence( const FunctorType * task_functor , int i ) const + { + return Future<void,execution_space>( + get_task_root(task_functor)->get_dependence(i) + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + int get_dependence( const FunctorType * task_functor ) const + { return get_task_root(task_functor)->get_dependence(); } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void clear_dependence( FunctorType * task_functor ) const + { get_task_root(task_functor)->clear_dependence(); } + + //---------------------------------------- + + __device__ + static member_type member_single() + { + return + member_type( 0 /* shared memory pointer */ + , 0 /* shared memory begin offset */ + , 0 /* shared memory end offset */ + , 0 /* scratch level_1 pointer */ + , 0 /* scratch level_1 size */ + , 0 /* league rank */ + , 1 /* league size */ ); + } + + friend void wait( TaskPolicy< Kokkos::Cuda > & ); +}; + +} /* namespace Experimental */ +} /* namespace Kokkos */ + + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_CUDA_TASKPOLICY_HPP */ + + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0b8427cbe1e9664a41b6bb8b33b21320ad613d78 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -0,0 +1,298 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_CUDA_VECTORIZATION_HPP +#define KOKKOS_CUDA_VECTORIZATION_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <Kokkos_Cuda.hpp> + +namespace Kokkos { + + +// Shuffle only makes sense on >= Kepler GPUs; it doesn't work on CPUs +// or other GPUs. We provide a generic definition (which is trivial +// and doesn't do what it claims to do) because we don't actually use +// this function unless we are on a suitable GPU, with a suitable +// Scalar type. (For example, in the mat-vec, the "ThreadsPerRow" +// internal parameter depends both on the ExecutionSpace and the Scalar type, +// and it controls whether shfl_down() gets called.) +namespace Impl { + + template< typename Scalar > + struct shfl_union { + enum {n = sizeof(Scalar)/4}; + float fval[n]; + KOKKOS_INLINE_FUNCTION + Scalar value() { + return *(Scalar*) fval; + } + KOKKOS_INLINE_FUNCTION + void operator= (Scalar& value_) { + float* const val_ptr = (float*) &value_; + for(int i=0; i<n ; i++) { + fval[i] = val_ptr[i]; + } + } + KOKKOS_INLINE_FUNCTION + void operator= (const Scalar& value_) { + float* const val_ptr = (float*) &value_; + for(int i=0; i<n ; i++) { + fval[i] = val_ptr[i]; + } + } + + }; +} + +#ifdef __CUDA_ARCH__ + #if (__CUDA_ARCH__ >= 300) + + KOKKOS_INLINE_FUNCTION + int shfl(const int &val, const int& srcLane, const int& width ) { + return __shfl(val,srcLane,width); + } + + KOKKOS_INLINE_FUNCTION + float shfl(const float &val, const int& srcLane, const int& width ) { + return __shfl(val,srcLane,width); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type& width + ) { + Scalar tmp1 = val; + float tmp = *reinterpret_cast<float*>(&tmp1); + tmp = __shfl(tmp,srcLane,width); + return *reinterpret_cast<Scalar*>(&tmp); + } + + KOKKOS_INLINE_FUNCTION + double shfl(const double &val, const int& srcLane, const int& width) { + int lo = __double2loint(val); + int hi = __double2hiint(val); + lo = __shfl(lo,srcLane,width); + hi = __shfl(hi,srcLane,width); + return __hiloint2double(hi,lo); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) { + int lo = __double2loint(*reinterpret_cast<const double*>(&val)); + int hi = __double2hiint(*reinterpret_cast<const double*>(&val)); + lo = __shfl(lo,srcLane,width); + hi = __shfl(hi,srcLane,width); + const double tmp = __hiloint2double(hi,lo); + return *(reinterpret_cast<const Scalar*>(&tmp)); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) > 8) ,int>::type& width) { + Impl::shfl_union<Scalar> s_val; + Impl::shfl_union<Scalar> r_val; + s_val = val; + + for(int i = 0; i<s_val.n; i++) + r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width); + return r_val.value(); + } + + KOKKOS_INLINE_FUNCTION + int shfl_down(const int &val, const int& delta, const int& width) { + return __shfl_down(val,delta,width); + } + + KOKKOS_INLINE_FUNCTION + float shfl_down(const float &val, const int& delta, const int& width) { + return __shfl_down(val,delta,width); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) { + Scalar tmp1 = val; + float tmp = *reinterpret_cast<float*>(&tmp1); + tmp = __shfl_down(tmp,delta,width); + return *reinterpret_cast<Scalar*>(&tmp); + } + + KOKKOS_INLINE_FUNCTION + double shfl_down(const double &val, const int& delta, const int& width) { + int lo = __double2loint(val); + int hi = __double2hiint(val); + lo = __shfl_down(lo,delta,width); + hi = __shfl_down(hi,delta,width); + return __hiloint2double(hi,lo); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) { + int lo = __double2loint(*reinterpret_cast<const double*>(&val)); + int hi = __double2hiint(*reinterpret_cast<const double*>(&val)); + lo = __shfl_down(lo,delta,width); + hi = __shfl_down(hi,delta,width); + const double tmp = __hiloint2double(hi,lo); + return *(reinterpret_cast<const Scalar*>(&tmp)); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) { + Impl::shfl_union<Scalar> s_val; + Impl::shfl_union<Scalar> r_val; + s_val = val; + + for(int i = 0; i<s_val.n; i++) + r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width); + return r_val.value(); + } + + KOKKOS_INLINE_FUNCTION + int shfl_up(const int &val, const int& delta, const int& width ) { + return __shfl_up(val,delta,width); + } + + KOKKOS_INLINE_FUNCTION + float shfl_up(const float &val, const int& delta, const int& width ) { + return __shfl_up(val,delta,width); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) { + Scalar tmp1 = val; + float tmp = *reinterpret_cast<float*>(&tmp1); + tmp = __shfl_up(tmp,delta,width); + return *reinterpret_cast<Scalar*>(&tmp); + } + + KOKKOS_INLINE_FUNCTION + double shfl_up(const double &val, const int& delta, const int& width ) { + int lo = __double2loint(val); + int hi = __double2hiint(val); + lo = __shfl_up(lo,delta,width); + hi = __shfl_up(hi,delta,width); + return __hiloint2double(hi,lo); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) { + int lo = __double2loint(*reinterpret_cast<const double*>(&val)); + int hi = __double2hiint(*reinterpret_cast<const double*>(&val)); + lo = __shfl_up(lo,delta,width); + hi = __shfl_up(hi,delta,width); + const double tmp = __hiloint2double(hi,lo); + return *(reinterpret_cast<const Scalar*>(&tmp)); + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) { + Impl::shfl_union<Scalar> s_val; + Impl::shfl_union<Scalar> r_val; + s_val = val; + + for(int i = 0; i<s_val.n; i++) + r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width); + return r_val.value(); + } + + #else + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl(const Scalar &val, const int& srcLane, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0."); + return val; + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_down(const Scalar &val, const int& delta, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0."); + return val; + } + + template<typename Scalar> + KOKKOS_INLINE_FUNCTION + Scalar shfl_up(const Scalar &val, const int& delta, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0."); + return val; + } + #endif +#else + template<typename Scalar> + inline + Scalar shfl(const Scalar &val, const int& srcLane, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0."); + return val; + } + + template<typename Scalar> + inline + Scalar shfl_down(const Scalar &val, const int& delta, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0."); + return val; + } + + template<typename Scalar> + inline + Scalar shfl_up(const Scalar &val, const int& delta, const int& width) { + if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0."); + return val; + } +#endif + + + +} + +#endif // KOKKOS_HAVE_CUDA +#endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp new file mode 100644 index 0000000000000000000000000000000000000000..92f6fc1f5f89a75fe717d351af5395da8bf894a4 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_VIEW_HPP +#define KOKKOS_CUDA_VIEW_HPP + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#ifdef KOKKOS_HAVE_CUDA + +#include <cstring> + +#include <Kokkos_HostSpace.hpp> +#include <Kokkos_CudaSpace.hpp> +#include <impl/Kokkos_Shape.hpp> +#include <Kokkos_View.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +struct AssertShapeBoundsAbort< CudaSpace > +{ + KOKKOS_INLINE_FUNCTION + static void apply( const size_t /* rank */ , + const size_t /* n0 */ , const size_t /* n1 */ , + const size_t /* n2 */ , const size_t /* n3 */ , + const size_t /* n4 */ , const size_t /* n5 */ , + const size_t /* n6 */ , const size_t /* n7 */ , + + const size_t /* arg_rank */ , + const size_t /* i0 */ , const size_t /* i1 */ , + const size_t /* i2 */ , const size_t /* i3 */ , + const size_t /* i4 */ , const size_t /* i5 */ , + const size_t /* i6 */ , const size_t /* i7 */ ) + { + Kokkos::abort("Kokkos::View array bounds violation"); + } +}; + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif // KOKKOS_HAVE_CUDA +#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp new file mode 100644 index 0000000000000000000000000000000000000000..deb955ccd4755d43a24469171f2689d8c2a87dae --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp @@ -0,0 +1,119 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_ABORT_HPP +#define KOKKOS_CUDA_ABORT_HPP + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +#include "Kokkos_Macros.hpp" +#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) + +#include <cuda.h> + +#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 ) +#error "Cuda version 4.1 or greater required" +#endif + +#if ( __CUDA_ARCH__ < 200 ) +#error "Cuda device capability 2.0 or greater required" +#endif + +extern "C" { +/* Cuda runtime function, declared in <crt/device_runtime.h> + * Requires capability 2.x or better. + */ +extern __device__ void __assertfail( + const void *message, + const void *file, + unsigned int line, + const void *function, + size_t charsize); +} + +namespace Kokkos { +namespace Impl { + +__device__ inline +void cuda_abort( const char * const message ) +{ +#ifndef __APPLE__ + const char empty[] = "" ; + + __assertfail( (const void *) message , + (const void *) empty , + (unsigned int) 0 , + (const void *) empty , + sizeof(char) ); +#endif +} + +} // namespace Impl +} // namespace Kokkos + +#else + +namespace Kokkos { +namespace Impl { +KOKKOS_INLINE_FUNCTION +void cuda_abort( const char * const ) {} +} +} + +#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) +namespace Kokkos { +__device__ inline +void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); } +} +#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */ + diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e813285fc739336dc61d105f2afd73b5064b20c3 --- /dev/null +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -0,0 +1,611 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP +#define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP + +#include <Kokkos_ExecPolicy.hpp> +#include <Kokkos_Parallel.hpp> +#include <initializer_list> + +#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) +#define KOKKOS_MDRANGE_IVDEP +#endif + +namespace Kokkos { namespace Experimental { + +enum class Iterate +{ + Default, // Default for the device + Left, // Left indices stride fastest + Right, // Right indices stride fastest + Flat, // Do not tile, only valid for inner direction +}; + +template <typename ExecSpace> +struct default_outer_direction +{ + using type = Iterate; + static constexpr Iterate value = Iterate::Right; +}; + +template <typename ExecSpace> +struct default_inner_direction +{ + using type = Iterate; + static constexpr Iterate value = Iterate::Right; +}; + + +// Iteration Pattern +template < unsigned N + , Iterate OuterDir = Iterate::Default + , Iterate InnerDir = Iterate::Default + > +struct Rank +{ + static_assert( N != 0u, "Kokkos Error: rank 0 undefined"); + static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range"); + static_assert( N < 4u, "Kokkos Error: Unsupported rank..."); + + using iteration_pattern = Rank<N, OuterDir, InnerDir>; + + static constexpr int rank = N; + static constexpr Iterate outer_direction = OuterDir; + static constexpr Iterate inner_direction = InnerDir; +}; + + + +// multi-dimensional iteration pattern +template <typename... Properties> +struct MDRangePolicy +{ + using range_policy = RangePolicy<Properties...>; + + static_assert( !std::is_same<range_policy,void>::value + , "Kokkos Error: MD iteration pattern not defined" ); + + using iteration_pattern = typename range_policy::iteration_pattern; + using work_tag = typename range_policy::work_tag; + + static constexpr int rank = iteration_pattern::rank; + + static constexpr int outer_direction = static_cast<int> ( + (iteration_pattern::outer_direction != Iterate::Default && iteration_pattern::outer_direction != Iterate::Flat) + ? iteration_pattern::outer_direction + : default_outer_direction< typename range_policy::execution_space>::value ); + + static constexpr int inner_direction = static_cast<int> ( + iteration_pattern::inner_direction != Iterate::Default + ? iteration_pattern::inner_direction + : default_inner_direction< typename range_policy::execution_space>::value ) ; + + + // Ugly ugly workaround intel 14 not handling scoped enum correctly + static constexpr int Flat = static_cast<int>( Iterate::Flat ); + static constexpr int Right = static_cast<int>( Iterate::Right ); + + + using size_type = typename range_policy::index_type; + using index_type = typename std::make_signed<size_type>::type; + + + template <typename I> + MDRangePolicy( std::initializer_list<I> upper_corner ) + { + static_assert( std::is_integral<I>::value, "Kokkos Error: corner defined with non-integral type" ); + + // TODO check size of lists equal to rank + // static_asserts on initializer_list.size() require c++14 + + //static_assert( upper_corner.size() == rank, "Kokkos Error: upper_corner has incorrect rank" ); + + const auto u = upper_corner.begin(); + + m_num_tiles = 1; + for (int i=0; i<rank; ++i) { + m_offset[i] = static_cast<index_type>(0); + m_dim[i] = static_cast<index_type>(u[i]); + if (inner_direction != Flat) { + // default tile size to 4 + m_tile[i] = 4; + } else { + m_tile[i] = 1; + } + m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; + m_num_tiles *= m_tile_dim[i]; + } + } + + template <typename IA, typename IB> + MDRangePolicy( std::initializer_list<IA> corner_a + , std::initializer_list<IB> corner_b + ) + { + static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" ); + static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" ); + + // TODO check size of lists equal to rank + // static_asserts on initializer_list.size() require c++14 + //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); + //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); + + + using A = typename std::make_signed<IA>::type; + using B = typename std::make_signed<IB>::type; + + const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); }; + const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); }; + + m_num_tiles = 1; + for (int i=0; i<rank; ++i) { + m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i)); + m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); + if (inner_direction != Flat) { + // default tile size to 4 + m_tile[i] = 4; + } else { + m_tile[i] = 1; + } + m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; + m_num_tiles *= m_tile_dim[i]; + } + } + + template <typename IA, typename IB, typename T> + MDRangePolicy( std::initializer_list<IA> corner_a + , std::initializer_list<IB> corner_b + , std::initializer_list<T> tile + ) + { + static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" ); + static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" ); + static_assert( std::is_integral<T>::value, "Kokkos Error: tile defined with non-integral type" ); + static_assert( inner_direction != Flat, "Kokkos Error: tiling not support with flat iteration" ); + + // TODO check size of lists equal to rank + // static_asserts on initializer_list.size() require c++14 + //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); + //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); + //static_assert( tile.size() == rank, "Kokkos Error: tile has incorrect rank" ); + + using A = typename std::make_signed<IA>::type; + using B = typename std::make_signed<IB>::type; + + const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); }; + const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); }; + const auto t = tile.begin(); + + m_num_tiles = 1; + for (int i=0; i<rank; ++i) { + m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i)); + m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); + m_tile[i] = static_cast<int>(t[i] > (T)0 ? t[i] : (T)1 ); + m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; + m_num_tiles *= m_tile_dim[i]; + } + } + + index_type m_offset[rank]; + index_type m_dim[rank]; + int m_tile[rank]; + index_type m_tile_dim[rank]; + size_type m_num_tiles; // product of tile dims +}; + +namespace Impl { + +// Serial, Threads, OpenMP +// use enable_if to overload for Cuda +template < typename MDRange, typename Functor, typename Enable = void > +struct MDForFunctor +{ + using work_tag = typename MDRange::work_tag; + using index_type = typename MDRange::index_type; + using size_type = typename MDRange::size_type; + + MDRange m_range; + Functor m_func; + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDRange const& range, Functor const& f ) + : m_range(range) + , m_func( f ) + {} + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDRange const& range, Functor && f ) + : m_range(range) + , m_func( std::forward<Functor>(f) ) + {} + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDRange && range, Functor const& f ) + : m_range( std::forward<MDRange>(range) ) + , m_func( f ) + {} + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDRange && range, Functor && f ) + : m_range( std::forward<MDRange>(range) ) + , m_func( std::forward<Functor>(f) ) + {} + + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDForFunctor const& ) = default; + + KOKKOS_INLINE_FUNCTION + MDForFunctor& operator=( MDForFunctor const& ) = default; + + KOKKOS_INLINE_FUNCTION + MDForFunctor( MDForFunctor && ) = default; + + KOKKOS_INLINE_FUNCTION + MDForFunctor& operator=( MDForFunctor && ) = default; + + // Rank-2, Flat, No Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && std::is_same<void, work_tag>::value + && MDRange::rank == 2 + && MDRange::inner_direction == MDRange::Flat + )>::type + operator()(Idx t) const + { + if ( MDRange::outer_direction == MDRange::Right ) { + m_func( m_range.m_offset[0] + ( t / m_range.m_dim[1] ) + , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); + } else { + m_func( m_range.m_offset[0] + ( t % m_range.m_dim[0] ) + , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); + } + } + + // Rank-2, Flat, Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && !std::is_same<void, work_tag>::value + && MDRange::rank == 2 + && MDRange::inner_direction == MDRange::Flat + )>::type + operator()(Idx t) const + { + if ( MDRange::outer_direction == MDRange::Right ) { + m_func( work_tag{}, m_range.m_offset[0] + ( t / m_range.m_dim[1] ) + , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); + } else { + m_func( work_tag{}, m_range.m_offset[0] + ( t % m_range.m_dim[0] ) + , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); + } + } + + // Rank-2, Not Flat, No Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && std::is_same<void, work_tag>::value + && MDRange::rank == 2 + && MDRange::inner_direction != MDRange::Flat + )>::type + operator()(Idx t) const + { + index_type t0, t1; + if ( MDRange::outer_direction == MDRange::Right ) { + t0 = t / m_range.m_tile_dim[1]; + t1 = t % m_range.m_tile_dim[1]; + } else { + t0 = t % m_range.m_tile_dim[0]; + t1 = t / m_range.m_tile_dim[0]; + } + + const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; + const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; + + const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); + const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); + + if ( MDRange::inner_direction == MDRange::Right ) { + for (int i0=b0; i0<e0; ++i0) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i1=b1; i1<e1; ++i1) { + m_func( i0, i1 ); + }} + } else { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i0=b0; i0<e0; ++i0) { + m_func( i0, i1 ); + }} + } + } + + // Rank-2, Not Flat, Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && !std::is_same<void, work_tag>::value + && MDRange::rank == 2 + && MDRange::inner_direction != MDRange::Flat + )>::type + operator()(Idx t) const + { + work_tag tag; + + index_type t0, t1; + if ( MDRange::outer_direction == MDRange::Right ) { + t0 = t / m_range.m_tile_dim[1]; + t1 = t % m_range.m_tile_dim[1]; + } else { + t0 = t % m_range.m_tile_dim[0]; + t1 = t / m_range.m_tile_dim[0]; + } + + const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; + const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; + + const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); + const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); + + if ( MDRange::inner_direction == MDRange::Right ) { + for (int i0=b0; i0<e0; ++i0) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i1=b1; i1<e1; ++i1) { + m_func( tag, i0, i1 ); + }} + } else { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i0=b0; i0<e0; ++i0) { + m_func( tag, i0, i1 ); + }} + } + } + + //--------------------------------------------------------------------------- + + // Rank-3, Flat, No Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && std::is_same<void, work_tag>::value + && MDRange::rank == 3 + && MDRange::inner_direction == MDRange::Flat + )>::type + operator()(Idx t) const + { + if ( MDRange::outer_direction == MDRange::Right ) { + const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; + m_func( m_range.m_offset[0] + ( t / tmp_prod ) + , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) + , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) + ); + } else { + const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; + m_func( m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) + , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) + , m_range.m_offset[2] + ( t / tmp_prod ) + ); + } + } + + // Rank-3, Flat, Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && !std::is_same<void, work_tag>::value + && MDRange::rank == 3 + && MDRange::inner_direction == MDRange::Flat + )>::type + operator()(Idx t) const + { + if ( MDRange::outer_direction == MDRange::Right ) { + const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; + m_func( work_tag{} + , m_range.m_offset[0] + ( t / tmp_prod ) + , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) + , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) + ); + } else { + const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; + m_func( work_tag{} + , m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) + , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) + , m_range.m_offset[2] + ( t / tmp_prod ) + ); + } + } + + // Rank-3, Not Flat, No Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && std::is_same<void, work_tag>::value + && MDRange::rank == 3 + && MDRange::inner_direction != MDRange::Flat + )>::type + operator()(Idx t) const + { + index_type t0, t1, t2; + if ( MDRange::outer_direction == MDRange::Right ) { + const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); + t0 = t / tmp_prod; + t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; + t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; + } else { + const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); + t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; + t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; + t2 = t / tmp_prod; + } + + const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; + const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; + const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; + + const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); + const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); + const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); + + if ( MDRange::inner_direction == MDRange::Right ) { + for (int i0=b0; i0<e0; ++i0) { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i2=b2; i2<e2; ++i2) { + m_func( i0, i1, i2 ); + }}} + } else { + for (int i2=b2; i2<e2; ++i2) { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i0=b0; i0<e0; ++i0) { + m_func( i0, i1, i2 ); + }}} + } + } + + // Rank-3, Not Flat, Tag + template <typename Idx> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( std::is_integral<Idx>::value + && !std::is_same<void, work_tag>::value + && MDRange::rank == 3 + && MDRange::inner_direction != MDRange::Flat + )>::type + operator()(Idx t) const + { + work_tag tag; + + index_type t0, t1, t2; + if ( MDRange::outer_direction == MDRange::Right ) { + const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); + t0 = t / tmp_prod; + t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; + t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; + } else { + const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); + t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; + t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; + t2 = t / tmp_prod; + } + + const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; + const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; + const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; + + const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); + const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); + const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); + + if ( MDRange::inner_direction == MDRange::Right ) { + for (int i0=b0; i0<e0; ++i0) { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i2=b2; i2<e2; ++i2) { + m_func( tag, i0, i1, i2 ); + }}} + } else { + for (int i2=b2; i2<e2; ++i2) { + for (int i1=b1; i1<e1; ++i1) { + #if defined(KOKKOS_MDRANGE_IVDEP) + #pragma ivdep + #endif + for (int i0=b0; i0<e0; ++i0) { + m_func( tag, i0, i1, i2 ); + }}} + } + } +}; + + + +} // namespace Impl + + +template <typename MDRange, typename Functor> +void md_parallel_for( MDRange const& range + , Functor const& f + , const std::string& str = "" + ) +{ + Impl::MDForFunctor<MDRange, Functor> g(range, f); + + using range_policy = typename MDRange::range_policy; + + Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); +} + +template <typename MDRange, typename Functor> +void md_parallel_for( const std::string& str + , MDRange const& range + , Functor const& f + ) +{ + Impl::MDForFunctor<MDRange, Functor> g(range, f); + + using range_policy = typename MDRange::range_policy; + + Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); +} + +}} // namespace Kokkos::Experimental + +#endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..038eda804fc5e3747e07ff3d8d64b2d5942271b8 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Array.hpp @@ -0,0 +1,302 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_ARRAY +#define KOKKOS_ARRAY + +#include <type_traits> +#include <algorithm> +#include <limits> +#include <cstddef> + +namespace Kokkos { + +/**\brief Derived from the C++17 'std::array'. + * Dropping the iterator interface. + */ +template< class T = void + , size_t N = ~size_t(0) + , class Proxy = void + > +struct Array { +private: + T m_elem[N]; +public: + + typedef T & reference ; + typedef typename std::add_const<T>::type & const_reference ; + typedef size_t size_type ; + typedef ptrdiff_t difference_type ; + typedef T value_type ; + typedef T * pointer ; + typedef typename std::add_const<T>::type * const_pointer ; + + KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N ; } + KOKKOS_INLINE_FUNCTION static constexpr bool empty(){ return false ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + reference operator[]( const iType & i ) + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i]; + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + const_reference operator[]( const iType & i ) const + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i]; + } + + KOKKOS_INLINE_FUNCTION pointer data() { return & m_elem[0] ; } + KOKKOS_INLINE_FUNCTION const_pointer data() const { return & m_elem[0] ; } + + ~Array() = default ; + Array() = default ; + Array( const Array & ) = default ; + Array & operator = ( const Array & ) = default ; + + // Some supported compilers are not sufficiently C++11 compliant + // for default move constructor and move assignment operator. + // Array( Array && ) = default ; + // Array & operator = ( Array && ) = default ; +}; + + +template< class T , class Proxy > +struct Array<T,0,Proxy> { +public: + + typedef typename std::add_const<T>::type & reference ; + typedef typename std::add_const<T>::type & const_reference ; + typedef size_t size_type ; + typedef ptrdiff_t difference_type ; + typedef typename std::add_const<T>::type value_type ; + typedef typename std::add_const<T>::type * pointer ; + typedef typename std::add_const<T>::type * const_pointer ; + + KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0 ; } + KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + value_type operator[]( const iType & ) + { + static_assert( std::is_integral<iType>::value , "Must be integer argument" ); + return value_type(); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + value_type operator[]( const iType & ) const + { + static_assert( std::is_integral<iType>::value , "Must be integer argument" ); + return value_type(); + } + + KOKKOS_INLINE_FUNCTION pointer data() { return pointer(0) ; } + KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); } + + ~Array() = default ; + Array() = default ; + Array( const Array & ) = default ; + Array & operator = ( const Array & ) = default ; + + // Some supported compilers are not sufficiently C++11 compliant + // for default move constructor and move assignment operator. + // Array( Array && ) = default ; + // Array & operator = ( Array && ) = default ; +}; + + +template<> +struct Array<void,~size_t(0),void> +{ + struct contiguous {}; + struct strided {}; +}; + +template< class T > +struct Array< T , ~size_t(0) , Array<>::contiguous > +{ +private: + T * m_elem ; + size_t m_size ; +public: + + typedef T & reference ; + typedef typename std::add_const<T>::type & const_reference ; + typedef size_t size_type ; + typedef ptrdiff_t difference_type ; + typedef T value_type ; + typedef T * pointer ; + typedef typename std::add_const<T>::type * const_pointer ; + + KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; } + KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + reference operator[]( const iType & i ) + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i]; + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + const_reference operator[]( const iType & i ) const + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i]; + } + + KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; } + KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; } + + ~Array() = default ; + Array() = delete ; + Array( const Array & rhs ) = delete ; + + // Some supported compilers are not sufficiently C++11 compliant + // for default move constructor and move assignment operator. + // Array( Array && rhs ) = default ; + // Array & operator = ( Array && rhs ) = delete ; + + KOKKOS_INLINE_FUNCTION + Array & operator = ( const Array & rhs ) + { + const size_t n = std::min( m_size , rhs.size() ); + for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ; + return *this ; + } + + template< size_t N , class P > + KOKKOS_INLINE_FUNCTION + Array & operator = ( const Array<T,N,P> & rhs ) + { + const size_t n = std::min( m_size , rhs.size() ); + for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type = 0 ) + : m_elem(arg_ptr), m_size(arg_size) {} +}; + +template< class T > +struct Array< T , ~size_t(0) , Array<>::strided > +{ +private: + T * m_elem ; + size_t m_size ; + size_t m_stride ; +public: + + typedef T & reference ; + typedef typename std::add_const<T>::type & const_reference ; + typedef size_t size_type ; + typedef ptrdiff_t difference_type ; + typedef T value_type ; + typedef T * pointer ; + typedef typename std::add_const<T>::type * const_pointer ; + + KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; } + KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + reference operator[]( const iType & i ) + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i*m_stride]; + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + const_reference operator[]( const iType & i ) const + { + static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + return m_elem[i*m_stride]; + } + + KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; } + KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; } + + ~Array() = default ; + Array() = delete ; + Array( const Array & ) = delete ; + + + // Some supported compilers are not sufficiently C++11 compliant + // for default move constructor and move assignment operator. + // Array( Array && rhs ) = default ; + // Array & operator = ( Array && rhs ) = delete ; + + KOKKOS_INLINE_FUNCTION + Array & operator = ( const Array & rhs ) + { + const size_t n = std::min( m_size , rhs.size() ); + for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ; + return *this ; + } + + template< size_t N , class P > + KOKKOS_INLINE_FUNCTION + Array & operator = ( const Array<T,N,P> & rhs ) + { + const size_t n = std::min( m_size , rhs.size() ); + for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type arg_stride ) + : m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {} +}; + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_ARRAY */ + diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6d37d69a63c8c837457fb2edba6a6d607103b6ad --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp @@ -0,0 +1,305 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_Atomic.hpp +/// \brief Atomic functions +/// +/// This header file defines prototypes for the following atomic functions: +/// - exchange +/// - compare and exchange +/// - add +/// +/// Supported types include: +/// - signed and unsigned 4 and 8 byte integers +/// - float +/// - double +/// +/// They are implemented through GCC compatible intrinsics, OpenMP +/// directives and native CUDA intrinsics. +/// +/// Including this header file requires one of the following +/// compilers: +/// - NVCC (for CUDA device code only) +/// - GCC (for host code only) +/// - Intel (for host code only) +/// - A compiler that supports OpenMP 3.1 (for host code only) + +#ifndef KOKKOS_ATOMIC_HPP +#define KOKKOS_ATOMIC_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_HostSpace.hpp> +#include <impl/Kokkos_Traits.hpp> + +//---------------------------------------------------------------------------- +#if defined(_WIN32) +#define KOKKOS_ATOMICS_USE_WINDOWS +#else +#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) + +// Compiling NVIDIA device code, must use Cuda atomics: + +#define KOKKOS_ATOMICS_USE_CUDA + +#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \ + ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \ + ! defined( KOKKOS_ATOMICS_USE_OMP31 ) + +// Compiling for non-Cuda atomic implementation has not been pre-selected. +// Choose the best implementation for the detected compiler. +// Preference: GCC, INTEL, OMP31 + +#if defined( KOKKOS_COMPILER_GNU ) || \ + defined( KOKKOS_COMPILER_CLANG ) || \ + ( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) ) + +#define KOKKOS_ATOMICS_USE_GCC + +#elif defined( KOKKOS_COMPILER_INTEL ) || \ + defined( KOKKOS_COMPILER_CRAYC ) + +#define KOKKOS_ATOMICS_USE_INTEL + +#elif defined( _OPENMP ) && ( 201107 <= _OPENMP ) + +#define KOKKOS_ATOMICS_USE_OMP31 + +#else + +#error "KOKKOS_ATOMICS_USE : Unsupported compiler" + +#endif + +#endif /* Not pre-selected atomic implementation */ +#endif + +//---------------------------------------------------------------------------- + +// Forward decalaration of functions supporting arbitrary sized atomics +// This is necessary since Kokkos_Atomic.hpp is internally included very early +// through Kokkos_HostSpace.hpp as well as the allocation tracker. +#ifdef KOKKOS_HAVE_CUDA +namespace Kokkos { +namespace Impl { +/// \brief Aquire a lock for the address +/// +/// This function tries to aquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully aquired the +/// function returns true. Otherwise it returns false. +__device__ inline +bool lock_address_cuda_space(void* ptr); + +/// \brief Release lock for the address +/// +/// This function releases the lock for the hash value derived +/// from the provided ptr. This function should only be called +/// after previously successfully aquiring a lock with +/// lock_address. +__device__ inline +void unlock_address_cuda_space(void* ptr); +} +} +#endif + + +namespace Kokkos { +template <typename T> +KOKKOS_INLINE_FUNCTION +void atomic_add(volatile T * const dest, const T src); + +// Atomic increment +template<typename T> +KOKKOS_INLINE_FUNCTION +void atomic_increment(volatile T* a); + +template<typename T> +KOKKOS_INLINE_FUNCTION +void atomic_decrement(volatile T* a); +} + +namespace Kokkos { + + +inline +const char * atomic_query_version() +{ +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + return "KOKKOS_ATOMICS_USE_CUDA" ; +#elif defined( KOKKOS_ATOMICS_USE_GCC ) + return "KOKKOS_ATOMICS_USE_GCC" ; +#elif defined( KOKKOS_ATOMICS_USE_INTEL ) + return "KOKKOS_ATOMICS_USE_INTEL" ; +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + return "KOKKOS_ATOMICS_USE_OMP31" ; +#elif defined( KOKKOS_ATOMICS_USE_WINDOWS ) + return "KOKKOS_ATOMICS_USE_WINDOWS"; +#endif +} + +} // namespace Kokkos + +#ifdef _WIN32 +#include "impl/Kokkos_Atomic_Windows.hpp" +#else + +//---------------------------------------------------------------------------- +// Atomic Assembly +// +// Implements CAS128-bit in assembly + +#include "impl/Kokkos_Atomic_Assembly.hpp" + +//---------------------------------------------------------------------------- +// Atomic exchange +// +// template< typename T > +// T atomic_exchange( volatile T* const dest , const T val ) +// { T tmp = *dest ; *dest = val ; return tmp ; } + +#include "impl/Kokkos_Atomic_Exchange.hpp" + +//---------------------------------------------------------------------------- +// Atomic compare-and-exchange +// +// template<class T> +// bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) +// { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; } + +#include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp" + +//---------------------------------------------------------------------------- +// Atomic fetch and add +// +// template<class T> +// T atomic_fetch_add(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest += val ; return tmp ; } + +#include "impl/Kokkos_Atomic_Fetch_Add.hpp" + +//---------------------------------------------------------------------------- +// Atomic increment +// +// template<class T> +// T atomic_increment(volatile T* const dest) +// { dest++; } + +#include "impl/Kokkos_Atomic_Increment.hpp" + +//---------------------------------------------------------------------------- +// Atomic Decrement +// +// template<class T> +// T atomic_decrement(volatile T* const dest) +// { dest--; } + +#include "impl/Kokkos_Atomic_Decrement.hpp" + +//---------------------------------------------------------------------------- +// Atomic fetch and sub +// +// template<class T> +// T atomic_fetch_sub(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest -= val ; return tmp ; } + +#include "impl/Kokkos_Atomic_Fetch_Sub.hpp" + +//---------------------------------------------------------------------------- +// Atomic fetch and or +// +// template<class T> +// T atomic_fetch_or(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest = tmp | val ; return tmp ; } + +#include "impl/Kokkos_Atomic_Fetch_Or.hpp" + +//---------------------------------------------------------------------------- +// Atomic fetch and and +// +// template<class T> +// T atomic_fetch_and(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest = tmp & val ; return tmp ; } + +#include "impl/Kokkos_Atomic_Fetch_And.hpp" +#endif /*Not _WIN32*/ + +//---------------------------------------------------------------------------- +// Memory fence +// +// All loads and stores from this thread will be globally consistent before continuing +// +// void memory_fence() {...}; +#include "impl/Kokkos_Memory_Fence.hpp" + +//---------------------------------------------------------------------------- +// Provide volatile_load and safe_load +// +// T volatile_load(T const volatile * const ptr); +// +// T const& safe_load(T const * const ptr); +// XEON PHI +// T safe_load(T const * const ptr + +#include "impl/Kokkos_Volatile_Load.hpp" + +#ifndef _WIN32 +#include "impl/Kokkos_Atomic_Generic.hpp" +#endif +//---------------------------------------------------------------------------- +// This atomic-style macro should be an inlined function, not a macro + +#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__) + + #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0) + #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0) + +#else + + #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) ((void)0) + #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) ((void)0) + +#endif + +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_ATOMIC_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cdfa4429f08f241d86bd32c3020f1b20c9a5a90b --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -0,0 +1,538 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_COMPLEX_HPP +#define KOKKOS_COMPLEX_HPP + +#include <Kokkos_Atomic.hpp> +#include <complex> +#include <iostream> + +namespace Kokkos { + +/// \class complex +/// \brief Partial reimplementation of std::complex that works as the +/// result of a Kokkos::parallel_reduce. +/// \tparam RealType The type of the real and imaginary parts of the +/// complex number. As with std::complex, this is only defined for +/// \c float, \c double, and <tt>long double</tt>. The latter is +/// currently forbidden in CUDA device kernels. +template<class RealType> +class complex { +private: + RealType re_, im_; + +public: + //! The type of the real or imaginary parts of this complex number. + typedef RealType value_type; + + //! Default constructor (initializes both real and imaginary parts to zero). + KOKKOS_INLINE_FUNCTION complex () : + re_ (0.0), im_ (0.0) + {} + + //! Copy constructor. + KOKKOS_INLINE_FUNCTION complex (const complex<RealType>& src) : + re_ (src.re_), im_ (src.im_) + {} + + //! Copy constructor from volatile. + KOKKOS_INLINE_FUNCTION complex (const volatile complex<RealType>& src) : + re_ (src.re_), im_ (src.im_) + {} + + /// \brief Conversion constructor from std::complex. + /// + /// This constructor cannot be called in a CUDA device function, + /// because std::complex's methods and nonmember functions are not + /// marked as CUDA device functions. + template<class InputRealType> + complex (const std::complex<InputRealType>& src) : + re_ (std::real (src)), im_ (std::imag (src)) + {} + + /// \brief Conversion operator to std::complex. + /// + /// This operator cannot be called in a CUDA device function, + /// because std::complex's methods and nonmember functions are not + /// marked as CUDA device functions. + operator std::complex<RealType> () const { + return std::complex<RealType> (re_, im_); + } + + /// \brief Constructor that takes just the real part, and sets the + /// imaginary part to zero. + template<class InputRealType> + KOKKOS_INLINE_FUNCTION complex (const InputRealType& val) : + re_ (val), im_ (0.0) + {} + + //! Constructor that takes the real and imaginary parts. + template<class RealType1, class RealType2> + KOKKOS_INLINE_FUNCTION complex (const RealType1& re, const RealType2& im) : + re_ (re), im_ (im) + {} + + //! Assignment operator. + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator= (const complex<InputRealType>& src) { + re_ = src.re_; + im_ = src.im_; + return *this; + } + + /// \brief Assignment operator, for volatile <tt>*this</tt> and + /// nonvolatile input. + /// + /// \param src [in] Input; right-hand side of the assignment. + /// + /// This operator returns \c void instead of <tt>volatile + /// complex<RealType>& </tt>. See Kokkos Issue #177 for the + /// explanation. In practice, this means that you should not chain + /// assignments with volatile lvalues. + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + void operator= (const complex<InputRealType>& src) volatile { + re_ = src.re_; + im_ = src.im_; + // We deliberately do not return anything here. See explanation + // in public documentation above. + } + + //! Assignment operator. + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + volatile complex<RealType>& operator= (const volatile complex<InputRealType>& src) volatile { + re_ = src.re_; + im_ = src.im_; + return *this; + } + + //! Assignment operator. + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator= (const volatile complex<InputRealType>& src) { + re_ = src.re_; + im_ = src.im_; + return *this; + } + + //! Assignment operator (from a real number). + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator= (const InputRealType& val) { + re_ = val; + im_ = static_cast<RealType> (0.0); + return *this; + } + + //! Assignment operator (from a real number). + template<class InputRealType> + KOKKOS_INLINE_FUNCTION + void operator= (const InputRealType& val) volatile { + re_ = val; + im_ = static_cast<RealType> (0.0); + } + + /// \brief Assignment operator from std::complex. + /// + /// This constructor cannot be called in a CUDA device function, + /// because std::complex's methods and nonmember functions are not + /// marked as CUDA device functions. + template<class InputRealType> + complex<RealType>& operator= (const std::complex<InputRealType>& src) { + re_ = std::real (src); + im_ = std::imag (src); + return *this; + } + + //! The imaginary part of this complex number. + KOKKOS_INLINE_FUNCTION RealType& imag () { + return im_; + } + + //! The real part of this complex number. + KOKKOS_INLINE_FUNCTION RealType& real () { + return re_; + } + + //! The imaginary part of this complex number. + KOKKOS_INLINE_FUNCTION const RealType imag () const { + return im_; + } + + //! The real part of this complex number. + KOKKOS_INLINE_FUNCTION const RealType real () const { + return re_; + } + + //! The imaginary part of this complex number (volatile overload). + KOKKOS_INLINE_FUNCTION volatile RealType& imag () volatile { + return im_; + } + + //! The real part of this complex number (volatile overload). + KOKKOS_INLINE_FUNCTION volatile RealType& real () volatile { + return re_; + } + + //! The imaginary part of this complex number (volatile overload). + KOKKOS_INLINE_FUNCTION const RealType imag () const volatile { + return im_; + } + + //! The real part of this complex number (volatile overload). + KOKKOS_INLINE_FUNCTION const RealType real () const volatile { + return re_; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator += (const complex<RealType>& src) { + re_ += src.re_; + im_ += src.im_; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator += (const volatile complex<RealType>& src) volatile { + re_ += src.re_; + im_ += src.im_; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator += (const RealType& src) { + re_ += src; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator += (const volatile RealType& src) volatile { + re_ += src; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator -= (const complex<RealType>& src) { + re_ -= src.re_; + im_ -= src.im_; + return *this; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator -= (const RealType& src) { + re_ -= src; + return *this; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator *= (const complex<RealType>& src) { + const RealType realPart = re_ * src.re_ - im_ * src.im_; + const RealType imagPart = re_ * src.im_ + im_ * src.re_; + re_ = realPart; + im_ = imagPart; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator *= (const volatile complex<RealType>& src) volatile { + const RealType realPart = re_ * src.re_ - im_ * src.im_; + const RealType imagPart = re_ * src.im_ + im_ * src.re_; + re_ = realPart; + im_ = imagPart; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator *= (const RealType& src) { + re_ *= src; + im_ *= src; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator *= (const volatile RealType& src) volatile { + re_ *= src; + im_ *= src; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator /= (const complex<RealType>& y) { + // Scale (by the "1-norm" of y) to avoid unwarranted overflow. + // If the real part is +/-Inf and the imaginary part is -/+Inf, + // this won't change the result. + const RealType s = ::fabs (y.real ()) + ::fabs (y.imag ()); + + // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. + // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, + // because y/s is NaN. + if (s == 0.0) { + this->re_ /= s; + this->im_ /= s; + } + else { + const complex<RealType> x_scaled (this->re_ / s, this->im_ / s); + const complex<RealType> y_conj_scaled (y.re_ / s, -(y.im_) / s); + const RealType y_scaled_abs = y_conj_scaled.re_ * y_conj_scaled.re_ + + y_conj_scaled.im_ * y_conj_scaled.im_; // abs(y) == abs(conj(y)) + *this = x_scaled * y_conj_scaled; + *this /= y_scaled_abs; + } + return *this; + } + + KOKKOS_INLINE_FUNCTION + complex<RealType>& operator /= (const RealType& src) { + re_ /= src; + im_ /= src; + return *this; + } +}; + +//! Binary + operator for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator + (const complex<RealType>& x, const complex<RealType>& y) { + return complex<RealType> (x.real () + y.real (), x.imag () + y.imag ()); +} + +//! Unary + operator for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator + (const complex<RealType>& x) { + return x; +} + +//! Binary - operator for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator - (const complex<RealType>& x, const complex<RealType>& y) { + return complex<RealType> (x.real () - y.real (), x.imag () - y.imag ()); +} + +//! Unary - operator for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator - (const complex<RealType>& x) { + return complex<RealType> (-x.real (), -x.imag ()); +} + +//! Binary * operator for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator * (const complex<RealType>& x, const complex<RealType>& y) { + return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (), + x.real () * y.imag () + x.imag () * y.real ()); +} + +/// \brief Binary * operator for std::complex and complex. +/// +/// This function exists because GCC 4.7.2 (and perhaps other +/// compilers) are not able to deduce that they can multiply +/// std::complex by Kokkos::complex, by first converting std::complex +/// to Kokkos::complex. +/// +/// This function cannot be called in a CUDA device function, because +/// std::complex's methods and nonmember functions are not marked as +/// CUDA device functions. +template<class RealType> +complex<RealType> +operator * (const std::complex<RealType>& x, const complex<RealType>& y) { + return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (), + x.real () * y.imag () + x.imag () * y.real ()); +} + +/// \brief Binary * operator for RealType times complex. +/// +/// This function exists because the compiler doesn't know that +/// RealType and complex<RealType> commute with respect to operator*. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator * (const RealType& x, const complex<RealType>& y) { + return complex<RealType> (x * y.real (), x * y.imag ()); +} + + +//! Imaginary part of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +RealType imag (const complex<RealType>& x) { + return x.imag (); +} + +//! Real part of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +RealType real (const complex<RealType>& x) { + return x.real (); +} + +//! Absolute value (magnitude) of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +RealType abs (const complex<RealType>& x) { + // FIXME (mfh 31 Oct 2014) Scale to avoid unwarranted overflow. + return ::sqrt (real (x) * real (x) + imag (x) * imag (x)); +} + +//! Conjugate of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> conj (const complex<RealType>& x) { + return complex<RealType> (real (x), -imag (x)); +} + + +//! Binary operator / for complex and real numbers +template<class RealType1, class RealType2> +KOKKOS_INLINE_FUNCTION +complex<RealType1> +operator / (const complex<RealType1>& x, const RealType2& y) { + return complex<RealType1> (real (x) / y, imag (x) / y); +} + +//! Binary operator / for complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator / (const complex<RealType>& x, const complex<RealType>& y) { + // Scale (by the "1-norm" of y) to avoid unwarranted overflow. + // If the real part is +/-Inf and the imaginary part is -/+Inf, + // this won't change the result. + const RealType s = ::fabs (real (y)) + ::fabs (imag (y)); + + // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. + // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, + // because y/s is NaN. + if (s == 0.0) { + return complex<RealType> (real (x) / s, imag (x) / s); + } + else { + const complex<RealType> x_scaled (real (x) / s, imag (x) / s); + const complex<RealType> y_conj_scaled (real (y) / s, -imag (y) / s); + const RealType y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) + + imag (y_conj_scaled) * imag (y_conj_scaled); // abs(y) == abs(conj(y)) + complex<RealType> result = x_scaled * y_conj_scaled; + result /= y_scaled_abs; + return result; + } +} + +//! Equality operator for two complex numbers. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator == (const complex<RealType>& x, const complex<RealType>& y) { + return real (x) == real (y) && imag (x) == imag (y); +} + +//! Equality operator for std::complex and Kokkos::complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator == (const std::complex<RealType>& x, const complex<RealType>& y) { + return std::real (x) == real (y) && std::imag (x) == imag (y); +} + +//! Equality operator for complex and real number. +template<class RealType1, class RealType2> +KOKKOS_INLINE_FUNCTION +bool operator == (const complex<RealType1>& x, const RealType2& y) { + return real (x) == y && imag (x) == static_cast<RealType1> (0.0); +} + +//! Equality operator for real and complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator == (const RealType& x, const complex<RealType>& y) { + return y == x; +} + +//! Inequality operator for two complex numbers. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator != (const complex<RealType>& x, const complex<RealType>& y) { + return real (x) != real (y) || imag (x) != imag (y); +} + +//! Inequality operator for std::complex and Kokkos::complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator != (const std::complex<RealType>& x, const complex<RealType>& y) { + return std::real (x) != real (y) || std::imag (x) != imag (y); +} + +//! Inequality operator for complex and real number. +template<class RealType1, class RealType2> +KOKKOS_INLINE_FUNCTION +bool operator != (const complex<RealType1>& x, const RealType2& y) { + return real (x) != y || imag (x) != static_cast<RealType1> (0.0); +} + +//! Inequality operator for real and complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +bool operator != (const RealType& x, const complex<RealType>& y) { + return y != x; +} + +template<class RealType> +std::ostream& operator << (std::ostream& os, const complex<RealType>& x) { + const std::complex<RealType> x_std (Kokkos::real (x), Kokkos::imag (x)); + os << x_std; + return os; +} + +template<class RealType> +std::ostream& operator >> (std::ostream& os, complex<RealType>& x) { + std::complex<RealType> x_std; + os >> x_std; + x = x_std; // only assigns on success of above + return os; +} + + +} // namespace Kokkos + +#endif // KOKKOS_COMPLEX_HPP diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp new file mode 100644 index 0000000000000000000000000000000000000000..82a342eec0bfba9e5420b86d41c586b22969712c --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -0,0 +1,78 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CORE_CONCEPTS_HPP +#define KOKKOS_CORE_CONCEPTS_HPP + +#include <type_traits> + +namespace Kokkos { +//Schedules for Execution Policies +struct Static {}; +struct Dynamic {}; + +//Schedule Wrapper Type +template<class T> +struct Schedule +{ + static_assert( std::is_same<T,Static>::value + || std::is_same<T,Dynamic>::value + , "Kokkos: Invalid Schedule<> type." + ); + using schedule_type = Schedule<T>; + using type = T; +}; + +//Specify Iteration Index Type +template<typename T> +struct IndexType +{ + static_assert(std::is_integral<T>::value,"Kokkos: Invalid IndexType<>."); + using index_type = IndexType<T>; + using type = T; +}; + +} // namespace Kokkos + +#endif // KOKKOS_CORE_CONCEPTS_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7cde4610ee8957c2eea7a9a2e05c8f2cbb9463f4 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -0,0 +1,174 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CORE_HPP +#define KOKKOS_CORE_HPP + +//---------------------------------------------------------------------------- +// Include the execution space header files for the enabled execution spaces. + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) +#include <Kokkos_Serial.hpp> +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) +#include <Kokkos_OpenMP.hpp> +#endif + +#if defined( KOKKOS_HAVE_PTHREAD ) +#include <Kokkos_Threads.hpp> +#endif + +#if defined( KOKKOS_HAVE_CUDA ) +#include <Kokkos_Cuda.hpp> +#endif + +#include <Kokkos_MemoryPool.hpp> +#include <Kokkos_Pair.hpp> +#include <Kokkos_Array.hpp> +#include <Kokkos_View.hpp> +#include <Kokkos_Vectorization.hpp> +#include <Kokkos_Atomic.hpp> +#include <Kokkos_hwloc.hpp> + +#ifdef KOKKOS_HAVE_CXX11 +#include <Kokkos_Complex.hpp> +#endif + + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +struct InitArguments { + int num_threads; + int num_numa; + int device_id; + + InitArguments() { + num_threads = -1; + num_numa = -1; + device_id = -1; + } +}; + +void initialize(int& narg, char* arg[]); + +void initialize(const InitArguments& args = InitArguments()); + +/** \brief Finalize the spaces that were initialized via Kokkos::initialize */ +void finalize(); + +/** \brief Finalize all known execution spaces */ +void finalize_all(); + +void fence(); + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/* Allocate memory from a memory space. + * The allocation is tracked in Kokkos memory tracking system, so + * leaked memory can be identified. + */ +template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space > +inline +void * kokkos_malloc( const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + typedef typename Space::memory_space MemorySpace ; + return Impl::SharedAllocationRecord< MemorySpace >:: + allocate_tracked( MemorySpace() , arg_alloc_label , arg_alloc_size ); +} + +template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space > +inline +void * kokkos_malloc( const size_t arg_alloc_size ) +{ + typedef typename Space::memory_space MemorySpace ; + return Impl::SharedAllocationRecord< MemorySpace >:: + allocate_tracked( MemorySpace() , "no-label" , arg_alloc_size ); +} + +template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space > +inline +void kokkos_free( void * arg_alloc ) +{ + typedef typename Space::memory_space MemorySpace ; + return Impl::SharedAllocationRecord< MemorySpace >:: + deallocate_tracked( arg_alloc ); +} + +template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space > +inline +void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size ) +{ + typedef typename Space::memory_space MemorySpace ; + return Impl::SharedAllocationRecord< MemorySpace >:: + reallocate_tracked( arg_alloc , arg_alloc_size ); +} + +} // namespace Experimental +} // namespace Kokkos + + +namespace Kokkos { + +using Kokkos::Experimental::kokkos_malloc ; +using Kokkos::Experimental::kokkos_realloc ; +using Kokkos::Experimental::kokkos_free ; + +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif + diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e9648b59b8f62c5cb4ea46c00ec1498c361cbdb4 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -0,0 +1,247 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CORE_FWD_HPP +#define KOKKOS_CORE_FWD_HPP + +//---------------------------------------------------------------------------- +// Kokkos_Macros.hpp does introspection on configuration options +// and compiler environment then sets a collection of #define macros. + +#include <Kokkos_Macros.hpp> + +//---------------------------------------------------------------------------- +// Have assumed a 64bit build (8byte pointers) throughout the code base. + +static_assert( sizeof(void*) == 8 + , "Kokkos assumes 64-bit build; i.e., 8-byte pointers" ); + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +struct AUTO_t { + KOKKOS_INLINE_FUNCTION + constexpr const AUTO_t & operator()() const { return *this ; } +}; + +namespace { +/**\brief Token to indicate that a parameter's value is to be automatically selected */ +constexpr AUTO_t AUTO = Kokkos::AUTO_t(); +} + +struct InvalidType {}; + +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Forward declarations for class inter-relationships + +namespace Kokkos { + +class HostSpace ; ///< Memory space for main process and CPU execution spaces + +#ifdef KOKKOS_HAVE_HBWSPACE +namespace Experimental { +class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor) +} +#endif + +#if defined( KOKKOS_HAVE_SERIAL ) +class Serial ; ///< Execution space main process on CPU +#endif // defined( KOKKOS_HAVE_SERIAL ) + +#if defined( KOKKOS_HAVE_PTHREAD ) +class Threads ; ///< Execution space with pthreads back-end +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) +class OpenMP ; ///< OpenMP execution space +#endif + +#if defined( KOKKOS_HAVE_CUDA ) +class CudaSpace ; ///< Memory space on Cuda GPU +class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM +class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU +class Cuda ; ///< Execution space for Cuda GPU +#endif + +template<class ExecutionSpace, class MemorySpace> +struct Device; +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Set the default execution space. + +/// Define Kokkos::DefaultExecutionSpace as per configuration option +/// or chosen from the enabled execution spaces in the following order: +/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial + +namespace Kokkos { + +#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) + typedef Cuda DefaultExecutionSpace ; +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultExecutionSpace ; +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultExecutionSpace ; +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultExecutionSpace ; +#else +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +#endif + +#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace ; +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultHostExecutionSpace ; +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultHostExecutionSpace ; +#elif defined ( KOKKOS_HAVE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace ; +#elif defined ( KOKKOS_HAVE_PTHREAD ) + typedef Threads DefaultHostExecutionSpace ; +#elif defined ( KOKKOS_HAVE_SERIAL ) + typedef Serial DefaultHostExecutionSpace ; +#else +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +#endif + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Detect the active execution space and define its memory space. +// This is used to verify whether a running kernel can access +// a given memory space. + +namespace Kokkos { +namespace Impl { + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA) +typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ; +#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +typedef Kokkos::HostSpace ActiveExecutionMemorySpace ; +#else +typedef void ActiveExecutionMemorySpace ; +#endif + +template< class ActiveSpace , class MemorySpace > +struct VerifyExecutionCanAccessMemorySpace { + enum {value = 0}; +}; + +template< class Space > +struct VerifyExecutionCanAccessMemorySpace< Space , Space > +{ + enum {value = 1}; + KOKKOS_INLINE_FUNCTION static void verify(void) {} + KOKKOS_INLINE_FUNCTION static void verify(const void *) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ + Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR ) + +#define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ + Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify() + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + void fence(); +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class Functor + , class Policy + , class EnableFunctor = void + , class EnablePolicy = void + > +struct FunctorPolicyExecutionSpace; + +//---------------------------------------------------------------------------- +/// \class ParallelFor +/// \brief Implementation of the ParallelFor operator that has a +/// partial specialization for the device. +/// +/// This is an implementation detail of parallel_for. Users should +/// skip this and go directly to the nonmember function parallel_for. +template< class FunctorType , class ExecPolicy , class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space + > class ParallelFor ; + +/// \class ParallelReduce +/// \brief Implementation detail of parallel_reduce. +/// +/// This is an implementation detail of parallel_reduce. Users should +/// skip this and go directly to the nonmember function parallel_reduce. +template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType, class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space + > class ParallelReduce ; + +/// \class ParallelScan +/// \brief Implementation detail of parallel_scan. +/// +/// This is an implementation detail of parallel_scan. Users should +/// skip this and go directly to the documentation of the nonmember +/// template function Kokkos::parallel_scan. +template< class FunctorType , class ExecPolicy , class ExecutionSapce = + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space + > class ParallelScan ; + +}} +#endif /* #ifndef KOKKOS_CORE_FWD_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3130ee3198f35ec59dbeef7755cfffc11fda9346 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -0,0 +1,274 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_HPP +#define KOKKOS_CUDA_HPP + +#include <Kokkos_Core_fwd.hpp> + +// If CUDA execution space is enabled then use this header file. + +#if defined( KOKKOS_HAVE_CUDA ) + +#include <iosfwd> +#include <vector> + +#include <Kokkos_CudaSpace.hpp> + +#include <Kokkos_Parallel.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_Layout.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_MemoryTraits.hpp> +#include <impl/Kokkos_Tags.hpp> + +#include <KokkosExp_MDRangePolicy.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +class CudaExec ; +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/// \class Cuda +/// \brief Kokkos Execution Space that uses CUDA to run on GPUs. +/// +/// An "execution space" represents a parallel execution model. It tells Kokkos +/// how to parallelize the execution of kernels in a parallel_for or +/// parallel_reduce. For example, the Threads execution space uses Pthreads or +/// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language +/// extensions, and the Serial execution space executes "parallel" kernels +/// sequentially. The Cuda execution space uses NVIDIA's CUDA programming +/// model to execute kernels in parallel on GPUs. +class Cuda { +public: + //! \name Type declarations that all Kokkos execution spaces must provide. + //@{ + + //! Tag this class as a kokkos execution space + typedef Cuda execution_space ; + +#if defined( KOKKOS_USE_CUDA_UVM ) + //! This execution space's preferred memory space. + typedef CudaUVMSpace memory_space ; +#else + //! This execution space's preferred memory space. + typedef CudaSpace memory_space ; +#endif + + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + //! The size_type best suited for this execution space. + typedef memory_space::size_type size_type ; + + //! This execution space's preferred array layout. + typedef LayoutLeft array_layout ; + + //! + typedef ScratchMemorySpace< Cuda > scratch_memory_space ; + + //@} + //-------------------------------------------------- + //! \name Functions that all Kokkos devices must implement. + //@{ + + /// \brief True if and only if this method is being called in a + /// thread-parallel function. + KOKKOS_INLINE_FUNCTION static int in_parallel() { +#if defined( __CUDA_ARCH__ ) + return true; +#else + return false; +#endif + } + + /** \brief Set the device in a "sleep" state. + * + * This function sets the device in a "sleep" state in which it is + * not ready for work. This may consume less resources than if the + * device were in an "awake" state, but it may also take time to + * bring the device from a sleep state to be ready for work. + * + * \return True if the device is in the "sleep" state, else false if + * the device is actively working and could not enter the "sleep" + * state. + */ + static bool sleep(); + + /// \brief Wake the device from the 'sleep' state so it is ready for work. + /// + /// \return True if the device is in the "ready" state, else "false" + /// if the device is actively working (which also means that it's + /// awake). + static bool wake(); + + /// \brief Wait until all dispatched functors complete. + /// + /// The parallel_for or parallel_reduce dispatch of a functor may + /// return asynchronously, before the functor completes. This + /// method does not return until all dispatched functors on this + /// device have completed. + static void fence(); + + //! Free any resources being consumed by the device. + static void finalize(); + + //! Has been initialized + static int is_initialized(); + + /** \brief Return the maximum amount of concurrency. */ + static int concurrency(); + + //! Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool detail = false ); + + //@} + //-------------------------------------------------- + //! \name Cuda space instances + + ~Cuda() {} + Cuda(); + explicit Cuda( const int instance_id ); + + Cuda( Cuda && ) = default ; + Cuda( const Cuda & ) = default ; + Cuda & operator = ( Cuda && ) = default ; + Cuda & operator = ( const Cuda & ) = default ; + + //-------------------------------------------------------------------------- + //! \name Device-specific functions + //@{ + + struct SelectDevice { + int cuda_device_id ; + SelectDevice() : cuda_device_id(0) {} + explicit SelectDevice( int id ) : cuda_device_id( id ) {} + }; + + //! Initialize, telling the CUDA run-time library which device to use. + static void initialize( const SelectDevice = SelectDevice() + , const size_t num_instances = 1 ); + + /// \brief Cuda device architecture of the selected device. + /// + /// This matches the __CUDA_ARCH__ specification. + static size_type device_arch(); + + //! Query device count. + static size_type detect_device_count(); + + /** \brief Detect the available devices and their architecture + * as defined by the __CUDA_ARCH__ specification. + */ + static std::vector<unsigned> detect_device_arch(); + + cudaStream_t cuda_stream() const { return m_stream ; } + int cuda_device() const { return m_device ; } + + //@} + //-------------------------------------------------------------------------- + +private: + + cudaStream_t m_stream ; + int m_device ; +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::CudaSpace + , Kokkos::Cuda::scratch_memory_space + > +{ + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify( void ) { } + KOKKOS_INLINE_FUNCTION static void verify( const void * ) { } +}; + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::HostSpace + , Kokkos::Cuda::scratch_memory_space + > +{ + enum { value = false }; + inline static void verify( void ) { CudaSpace::access_error(); } + inline static void verify( const void * p ) { CudaSpace::access_error(p); } +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#include <Cuda/Kokkos_CudaExec.hpp> +#include <Cuda/Kokkos_Cuda_View.hpp> + +#include <Cuda/KokkosExp_Cuda_View.hpp> + +#include <Cuda/Kokkos_Cuda_Parallel.hpp> +#include <Cuda/Kokkos_Cuda_Task.hpp> + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ +#endif /* #ifndef KOKKOS_CUDA_HPP */ + + + diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cd728895d0f02419d702ccb37ec9b048b08a6df8 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -0,0 +1,802 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDASPACE_HPP +#define KOKKOS_CUDASPACE_HPP + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) + +#include <iosfwd> +#include <typeinfo> +#include <string> + +#include <Kokkos_HostSpace.hpp> + +#include <Cuda/Kokkos_Cuda_abort.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Cuda on-device memory management */ + +class CudaSpace { +public: + + //! Tag this class as a kokkos memory space + typedef CudaSpace memory_space ; + typedef Kokkos::Cuda execution_space ; + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef unsigned int size_type ; + + /*--------------------------------*/ + + CudaSpace(); + CudaSpace( CudaSpace && rhs ) = default ; + CudaSpace( const CudaSpace & rhs ) = default ; + CudaSpace & operator = ( CudaSpace && rhs ) = default ; + CudaSpace & operator = ( const CudaSpace & rhs ) = default ; + ~CudaSpace() = default ; + + /**\brief Allocate untracked memory in the cuda space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the cuda space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + + /*--------------------------------*/ + /** \brief Error reporting for HostSpace attempt to access CudaSpace */ + static void access_error(); + static void access_error( const void * const ); + +private: + + int m_device ; ///< Which Cuda device + + // friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ; +}; + +namespace Impl { +/// \brief Initialize lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function initializes the locks to zero (unset). +void init_lock_arrays_cuda_space(); + +/// \brief Retrieve the pointer to the lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* atomic_lock_array_cuda_space_ptr(bool deallocate = false); + +/// \brief Retrieve the pointer to the scratch array for team and thread private global memory. +/// +/// Team and Thread private scratch allocations in +/// global memory are aquired via locks. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* scratch_lock_array_cuda_space_ptr(bool deallocate = false); + +/// \brief Retrieve the pointer to the scratch array for unique identifiers. +/// +/// Unique identifiers in the range 0-Cuda::concurrency +/// are provided via locks. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* threadid_lock_array_cuda_space_ptr(bool deallocate = false); +} +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Cuda memory that is accessible to Host execution space + * through Cuda's unified virtual memory (UVM) runtime. + */ +class CudaUVMSpace { +public: + + //! Tag this class as a kokkos memory space + typedef CudaUVMSpace memory_space ; + typedef Cuda execution_space ; + typedef Kokkos::Device<execution_space,memory_space> device_type; + typedef unsigned int size_type ; + + /** \brief If UVM capability is available */ + static bool available(); + + /*--------------------------------*/ + + CudaUVMSpace(); + CudaUVMSpace( CudaUVMSpace && rhs ) = default ; + CudaUVMSpace( const CudaUVMSpace & rhs ) = default ; + CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ; + CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ; + ~CudaUVMSpace() = default ; + + /**\brief Allocate untracked memory in the cuda space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the cuda space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + + /*--------------------------------*/ + +private: + + int m_device ; ///< Which Cuda device +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Host memory that is accessible to Cuda execution space + * through Cuda's host-pinned memory allocation. + */ +class CudaHostPinnedSpace { +public: + + //! Tag this class as a kokkos memory space + /** \brief Memory is in HostSpace so use the HostSpace::execution_space */ + typedef HostSpace::execution_space execution_space ; + typedef CudaHostPinnedSpace memory_space ; + typedef Kokkos::Device<execution_space,memory_space> device_type; + typedef unsigned int size_type ; + + /*--------------------------------*/ + + CudaHostPinnedSpace(); + CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ; + CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ; + CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ; + CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ; + ~CudaHostPinnedSpace() = default ; + + /**\brief Allocate untracked memory in the space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + + /*--------------------------------*/ +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +void DeepCopyAsyncCuda( void * dst , const void * src , size_t n); + +template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda> +{ + DeepCopy( void * dst , const void * src , size_t ); + DeepCopy( const Cuda & , void * dst , const void * src , size_t ); +}; + +template<> struct DeepCopy< CudaSpace , HostSpace , Cuda > +{ + DeepCopy( void * dst , const void * src , size_t ); + DeepCopy( const Cuda & , void * dst , const void * src , size_t ); +}; + +template<> struct DeepCopy< HostSpace , CudaSpace , Cuda > +{ + DeepCopy( void * dst , const void * src , size_t ); + DeepCopy( const Cuda & , void * dst , const void * src , size_t ); +}; + +template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> +struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> +struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> +struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace> +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + + +template<class ExecutionSpace> +struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace> +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> +struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace> +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> +struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace> +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + + +template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + + +template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace > +{ + inline + DeepCopy( void * dst , const void * src , size_t n ) + { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); } + + inline + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) + { + exec.fence(); + DeepCopyAsyncCuda (dst,src,n); + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** Running in CudaSpace attempting to access HostSpace: error */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace > +{ + enum { value = false }; + KOKKOS_INLINE_FUNCTION static void verify( void ) + { Kokkos::abort("Cuda code attempted to access HostSpace memory"); } + + KOKKOS_INLINE_FUNCTION static void verify( const void * ) + { Kokkos::abort("Cuda code attempted to access HostSpace memory"); } +}; + +/** Running in CudaSpace accessing CudaUVMSpace: ok */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > +{ + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify( void ) { } + KOKKOS_INLINE_FUNCTION static void verify( const void * ) { } +}; + +/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > +{ + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify( void ) { } + KOKKOS_INLINE_FUNCTION static void verify( const void * ) { } +}; + +/** Running in CudaSpace attempting to access an unknown space: error */ +template< class OtherSpace > +struct VerifyExecutionCanAccessMemorySpace< + typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type , + OtherSpace > +{ + enum { value = false }; + KOKKOS_INLINE_FUNCTION static void verify( void ) + { Kokkos::abort("Cuda code attempted to access unknown Space memory"); } + + KOKKOS_INLINE_FUNCTION static void verify( const void * ) + { Kokkos::abort("Cuda code attempted to access unknown Space memory"); } +}; + +//---------------------------------------------------------------------------- +/** Running in HostSpace attempting to access CudaSpace */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace > +{ + enum { value = false }; + inline static void verify( void ) { CudaSpace::access_error(); } + inline static void verify( const void * p ) { CudaSpace::access_error(p); } +}; + +/** Running in HostSpace accessing CudaUVMSpace is OK */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +/** Running in HostSpace accessing CudaHostPinnedSpace is OK */ +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > +{ + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify( void ) {} + KOKKOS_INLINE_FUNCTION static void verify( const void * ) {} +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template<> +class SharedAllocationRecord< Kokkos::CudaSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ; + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + static ::cudaTextureObject_t + attach_texture_object( const unsigned sizeof_alias + , void * const alloc_ptr + , const size_t alloc_size ); + + static RecordBase s_root_record ; + + ::cudaTextureObject_t m_tex_obj ; + const Kokkos::CudaSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {} + + SharedAllocationRecord( const Kokkos::CudaSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + std::string get_label() const ; + + static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::CudaSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + template< typename AliasType > + inline + ::cudaTextureObject_t attach_texture_object() + { + static_assert( ( std::is_same< AliasType , int >::value || + std::is_same< AliasType , ::int2 >::value || + std::is_same< AliasType , ::int4 >::value ) + , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" ); + + if ( m_tex_obj == 0 ) { + m_tex_obj = attach_texture_object( sizeof(AliasType) + , (void*) RecordBase::m_alloc_ptr + , RecordBase::m_alloc_size ); + } + + return m_tex_obj ; + } + + template< typename AliasType > + inline + int attach_texture_object_offset( const AliasType * const ptr ) + { + // Texture object is attached to the entire allocation range + return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr ); + } + + static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false ); +}; + + +template<> +class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + static RecordBase s_root_record ; + + ::cudaTextureObject_t m_tex_obj ; + const Kokkos::CudaUVMSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {} + + SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + std::string get_label() const ; + + static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ); + + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + + template< typename AliasType > + inline + ::cudaTextureObject_t attach_texture_object() + { + static_assert( ( std::is_same< AliasType , int >::value || + std::is_same< AliasType , ::int2 >::value || + std::is_same< AliasType , ::int4 >::value ) + , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" ); + + if ( m_tex_obj == 0 ) { + m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >:: + attach_texture_object( sizeof(AliasType) + , (void*) RecordBase::m_alloc_ptr + , RecordBase::m_alloc_size ); + } + + return m_tex_obj ; + } + + template< typename AliasType > + inline + int attach_texture_object_offset( const AliasType * const ptr ) + { + // Texture object is attached to the entire allocation range + return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr ); + } + + static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false ); +}; + +template<> +class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + static RecordBase s_root_record ; + + const Kokkos::CudaHostPinnedSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() : RecordBase(), m_space() {} + + SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + std::string get_label() const ; + + static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ); + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false ); +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ +#endif /* #define KOKKOS_CUDASPACE_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5834fc04dbe43c78bd53b032db1e97ade5e34655 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -0,0 +1,570 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXECPOLICY_HPP +#define KOKKOS_EXECPOLICY_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_StaticAssert.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_AnalyzePolicy.hpp> +#include <Kokkos_Concepts.hpp> +#include <iostream> +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/** \brief Execution policy for work over a range of an integral type. + * + * Valid template argument options: + * + * With a specified execution space: + * < ExecSpace , WorkTag , { IntConst | IntType } > + * < ExecSpace , WorkTag , void > + * < ExecSpace , { IntConst | IntType } , void > + * < ExecSpace , void , void > + * + * With the default execution space: + * < WorkTag , { IntConst | IntType } , void > + * < WorkTag , void , void > + * < { IntConst | IntType } , void , void > + * < void , void , void > + * + * IntType is a fundamental integral type + * IntConst is an Impl::integral_constant< IntType , Blocking > + * + * Blocking is the granularity of partitioning the range among threads. + */ +template<class ... Properties> +class RangePolicy + : public Impl::PolicyTraits<Properties ... > +{ +private: + + typedef Impl::PolicyTraits<Properties ... > traits; + + typename traits::execution_space m_space ; + typename traits::index_type m_begin ; + typename traits::index_type m_end ; + typename traits::index_type m_granularity ; + typename traits::index_type m_granularity_mask ; +public: + + //! Tag this class as an execution policy + typedef RangePolicy execution_policy; + typedef typename traits::index_type member_type ; + + KOKKOS_INLINE_FUNCTION const typename traits::execution_space & space() const { return m_space ; } + KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; } + KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; } + + + //TODO: find a better workaround for Clangs weird instantiation order + // This thing is here because of an instantiation error, where the RangePolicy is inserted into FunctorValue Traits, which + // tries decltype on the operator. It tries to do this even though the first argument of parallel for clearly doesn't match. + void operator()(const int&) const {} + + RangePolicy(const RangePolicy&) = default; + RangePolicy(RangePolicy&&) = default; + + inline RangePolicy() : m_space(), m_begin(0), m_end(0) {} + + /** \brief Total range */ + inline + RangePolicy( const typename traits::execution_space & work_space + , const member_type work_begin + , const member_type work_end + ) + : m_space( work_space ) + , m_begin( work_begin < work_end ? work_begin : 0 ) + , m_end( work_begin < work_end ? work_end : 0 ) + , m_granularity(0) + , m_granularity_mask(0) + { + set_auto_chunk_size(); + } + + /** \brief Total range */ + inline + RangePolicy( const member_type work_begin + , const member_type work_end + ) + : RangePolicy( typename traits::execution_space() + , work_begin , work_end ) + {} + + public: + + /** \brief return chunk_size */ + inline member_type chunk_size() const { + return m_granularity; + } + + /** \brief set chunk_size to a discrete value*/ + inline RangePolicy set_chunk_size(int chunk_size_) const { + RangePolicy p = *this; + p.m_granularity = chunk_size_; + p.m_granularity_mask = p.m_granularity - 1; + return p; + } + + private: + /** \brief finalize chunk_size if it was set to AUTO*/ + inline void set_auto_chunk_size() { + + typename traits::index_type concurrency = traits::execution_space::concurrency(); + if( concurrency==0 ) concurrency=1; + + if(m_granularity > 0) { + if(!Impl::is_integral_power_of_two( m_granularity )) + Kokkos::abort("RangePolicy blocking granularity must be power of two" ); + } + + + member_type new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_end-m_begin) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_granularity = new_chunk_size; + m_granularity_mask = m_granularity - 1; + } + + public: + /** \brief Subrange for a partition's rank and size. + * + * Typically used to partition a range over a group of threads. + */ + struct WorkRange { + typedef typename RangePolicy::work_tag work_tag ; + typedef typename RangePolicy::member_type member_type ; + + KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; } + KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; } + + /** \brief Subrange for a partition's rank and size. + * + * Typically used to partition a range over a group of threads. + */ + KOKKOS_INLINE_FUNCTION + WorkRange( const RangePolicy & range + , const int part_rank + , const int part_size + ) + : m_begin(0), m_end(0) + { + if ( part_size ) { + + // Split evenly among partitions, then round up to the granularity. + const member_type work_part = + ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size ) + + range.m_granularity_mask ) & ~member_type(range.m_granularity_mask); + + m_begin = range.begin() + work_part * part_rank ; + m_end = m_begin + work_part ; + + if ( range.end() < m_begin ) m_begin = range.end() ; + if ( range.end() < m_end ) m_end = range.end() ; + } + } + private: + member_type m_begin ; + member_type m_end ; + WorkRange(); + WorkRange & operator = ( const WorkRange & ); + + }; +}; + + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + + +template< class ExecSpace, class ... Properties> +class TeamPolicyInternal: public Impl::PolicyTraits<Properties ... > { +private: + typedef Impl::PolicyTraits<Properties ... > traits; + +public: + + //---------------------------------------- + /** \brief Query maximum team size for a given functor. + * + * This size takes into account execution space concurrency limitations and + * scratch memory space limitations for reductions, team reduce/scan, and + * team shared memory. + */ + template< class FunctorType > + static int team_size_max( const FunctorType & ); + + /** \brief Query recommended team size for a given functor. + * + * This size takes into account execution space concurrency limitations and + * scratch memory space limitations for reductions, team reduce/scan, and + * team shared memory. + */ + template< class FunctorType > + static int team_size_recommended( const FunctorType & ); + + template< class FunctorType > + static int team_size_recommended( const FunctorType & , const int&); + //---------------------------------------- + /** \brief Construct policy with the given instance of the execution space */ + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ); + + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ); + + /** \brief Construct policy with the default instance of the execution space */ + TeamPolicyInternal( int league_size_request , int team_size_request , int vector_length_request = 1 ); + + TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ); + +/* TeamPolicyInternal( int league_size_request , int team_size_request ); + + TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/ + + /** \brief The actual league size (number of teams) of the policy. + * + * This may be smaller than the requested league size due to limitations + * of the execution space. + */ + KOKKOS_INLINE_FUNCTION int league_size() const ; + + /** \brief The actual team size (number of threads per team) of the policy. + * + * This may be smaller than the requested team size due to limitations + * of the execution space. + */ + KOKKOS_INLINE_FUNCTION int team_size() const ; + + inline typename traits::index_type chunk_size() const ; + + inline TeamPolicyInternal set_chunk_size(int chunk_size) const ; + + /** \brief Parallel execution of a functor calls the functor once with + * each member of the execution policy. + */ + struct member_type { + + /** \brief Handle to the currently executing team shared scratch memory */ + KOKKOS_INLINE_FUNCTION + typename traits::execution_space::scratch_memory_space team_shmem() const ; + + /** \brief Rank of this team within the league of teams */ + KOKKOS_INLINE_FUNCTION int league_rank() const ; + + /** \brief Number of teams in the league */ + KOKKOS_INLINE_FUNCTION int league_size() const ; + + /** \brief Rank of this thread within this team */ + KOKKOS_INLINE_FUNCTION int team_rank() const ; + + /** \brief Number of threads in this team */ + KOKKOS_INLINE_FUNCTION int team_size() const ; + + /** \brief Barrier among the threads of this team */ + KOKKOS_INLINE_FUNCTION void team_barrier() const ; + + /** \brief Intra-team reduction. Returns join of all values of the team members. */ + template< class JoinOp > + KOKKOS_INLINE_FUNCTION + typename JoinOp::value_type team_reduce( const typename JoinOp::value_type + , const JoinOp & ) const ; + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const ; + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ; + }; +}; +} + +namespace Impl { + struct PerTeamValue { + int value; + PerTeamValue(int arg); + }; + + struct PerThreadValue { + int value; + PerThreadValue(int arg); + }; +} + +Impl::PerTeamValue PerTeam(const int& arg); +Impl::PerThreadValue PerThread(const int& arg); + + +/** \brief Execution policy for parallel work over a league of teams of threads. + * + * The work functor is called for each thread of each team such that + * the team's member threads are guaranteed to be concurrent. + * + * The team's threads have access to team shared scratch memory and + * team collective operations. + * + * If the WorkTag is non-void then the first calling argument of the + * work functor's parentheses operator is 'const WorkTag &'. + * This allows a functor to have multiple work member functions. + * + * Order of template arguments does not matter, since the implementation + * uses variadic templates. Each and any of the template arguments can + * be omitted. + * + * Possible Template arguments and there default values: + * ExecutionSpace (DefaultExecutionSpace): where to execute code. Must be enabled. + * WorkTag (none): Tag which is used as the first argument for the functor operator. + * Schedule<Type> (Schedule<Static>): Scheduling Policy (Dynamic, or Static). + * IndexType<Type> (IndexType<ExecutionSpace::size_type>: Integer Index type used to iterate over the Index space. + */ +template< class ... Properties> +class TeamPolicy: public + Impl::TeamPolicyInternal< + typename Impl::PolicyTraits<Properties ... >::execution_space, + Properties ...> { + typedef Impl::TeamPolicyInternal< + typename Impl::PolicyTraits<Properties ... >::execution_space, + Properties ...> internal_policy; + + typedef Impl::PolicyTraits<Properties ... > traits; + +public: + typedef TeamPolicy execution_policy; + + TeamPolicy& operator = (const TeamPolicy&) = default; + + /** \brief Construct policy with the given instance of the execution space */ + TeamPolicy( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ) + : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {} + + TeamPolicy( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) + : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {} + + /** \brief Construct policy with the default instance of the execution space */ + TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 ) + : internal_policy(league_size_request,team_size_request, vector_length_request) {} + + TeamPolicy( int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) + : internal_policy(league_size_request,Kokkos::AUTO(), vector_length_request) {} + +/* TeamPolicy( int league_size_request , int team_size_request ) + : internal_policy(league_size_request,team_size_request) {} + + TeamPolicy( int league_size_request , const Kokkos::AUTO_t & ) + : internal_policy(league_size_request,Kokkos::AUTO()) {}*/ + +private: + TeamPolicy(const internal_policy& p):internal_policy(p) {} +public: + + inline TeamPolicy set_chunk_size(int chunk) const { + return TeamPolicy(internal_policy::set_chunk_size(chunk)); + }; + + inline TeamPolicy set_scratch_size(const int& level, const Impl::PerTeamValue& per_team) const { + return TeamPolicy(internal_policy::set_scratch_size(level,per_team)); + }; + inline TeamPolicy set_scratch_size(const int& level, const Impl::PerThreadValue& per_thread) const { + return TeamPolicy(internal_policy::set_scratch_size(level,per_thread)); + }; + inline TeamPolicy set_scratch_size(const int& level, const Impl::PerTeamValue& per_team, const Impl::PerThreadValue& per_thread) const { + return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread)); + }; + inline TeamPolicy set_scratch_size(const int& level, const Impl::PerThreadValue& per_thread, const Impl::PerTeamValue& per_team) const { + return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread)); + }; + +}; + +} // namespace Kokkos + +namespace Kokkos { + +namespace Impl { + +template<typename iType, class TeamMemberType> +struct TeamThreadRangeBoundariesStruct { +private: + + KOKKOS_INLINE_FUNCTION static + iType ibegin( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ; + } + + KOKKOS_INLINE_FUNCTION static + iType iend( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 ); + return end_ < arg_end ? end_ : arg_end ; + } + +public: + + typedef iType index_type; + const iType start; + const iType end; + enum {increment = 1}; + const TeamMemberType& thread; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_end + ) + : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_begin + , const iType& arg_end + ) + : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} +}; + + template<typename iType, class TeamMemberType> + struct ThreadVectorRangeBoundariesStruct { + typedef iType index_type; + enum {start = 0}; + const iType end; + enum {increment = 1}; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count): + end( count ) + {} + }; + + template<class TeamMemberType> + struct ThreadSingleStruct { + const TeamMemberType& team_member; + KOKKOS_INLINE_FUNCTION + ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){} + }; + + template<class TeamMemberType> + struct VectorSingleStruct { + const TeamMemberType& team_member; + KOKKOS_INLINE_FUNCTION + VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){} + }; +} // namespace Impl + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. + */ +template<typename iType, class TeamMemberType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count); + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. + */ +template<typename iType, class TeamMemberType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end); + +/** \brief Execution policy for a vector parallel loop. + * + * The range is split over all vector lanes in a thread. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. + */ +template<typename iType, class TeamMemberType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count); + +} // namespace Kokkos + + +#endif /* #define KOKKOS_EXECPOLICY_HPP */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e02689b0f96f370448061cb90bd80a3492d32c35 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -0,0 +1,312 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HBWSPACE_HPP +#define KOKKOS_HBWSPACE_HPP + + +#include <Kokkos_HostSpace.hpp> +#include <impl/Kokkos_HBWAllocators.hpp> + +/*--------------------------------------------------------------------------*/ +#ifdef KOKKOS_HAVE_HBWSPACE + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/// \brief Initialize lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function initializes the locks to zero (unset). +void init_lock_array_hbw_space(); + +/// \brief Aquire a lock for the address +/// +/// This function tries to aquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully aquired the +/// function returns true. Otherwise it returns false. +bool lock_address_hbw_space(void* ptr); + +/// \brief Release lock for the address +/// +/// This function releases the lock for the hash value derived +/// from the provided ptr. This function should only be called +/// after previously successfully aquiring a lock with +/// lock_address. +void unlock_address_hbw_space(void* ptr); + +} // namespace Impl +} // neamspace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { + +/// \class HBWSpace +/// \brief Memory management for host memory. +/// +/// HBWSpace is a memory space that governs host memory. "Host" +/// memory means the usual CPU-accessible memory. +class HBWSpace { +public: + + //! Tag this class as a kokkos memory space + typedef HBWSpace memory_space ; + typedef size_t size_type ; + + /// \typedef execution_space + /// \brief Default execution space for this memory space. + /// + /// Every memory space has a default execution space. This is + /// useful for things like initializing a View (which happens in + /// parallel using the View's default execution space). +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef Kokkos::OpenMP execution_space ; +#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Kokkos::Threads execution_space ; +#elif defined( KOKKOS_HAVE_OPENMP ) + typedef Kokkos::OpenMP execution_space ; +#elif defined( KOKKOS_HAVE_PTHREAD ) + typedef Kokkos::Threads execution_space ; +#elif defined( KOKKOS_HAVE_SERIAL ) + typedef Kokkos::Serial execution_space ; +#else +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +#endif + + //! This memory space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + /*--------------------------------*/ + /* Functions unique to the HBWSpace */ + static int in_parallel(); + + static void register_in_parallel( int (*)() ); + + /*--------------------------------*/ + + /**\brief Default memory space instance */ + HBWSpace(); + HBWSpace( const HBWSpace & rhs ) = default ; + HBWSpace & operator = ( const HBWSpace & ) = default ; + ~HBWSpace() = default ; + + /**\brief Non-default memory space instance to choose allocation mechansim, if available */ + + enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + + explicit + HBWSpace( const AllocationMechanism & ); + + /**\brief Allocate untracked memory in the space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + +private: + + AllocationMechanism m_alloc_mech ; + + friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ; +}; + +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template<> +class SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + friend Kokkos::Experimental::HBWSpace ; + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + /**\brief Root record for tracked allocations from this HBWSpace instance */ + static RecordBase s_root_record ; + + const Kokkos::Experimental::HBWSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() = default ; + + SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + inline + std::string get_label() const + { + return std::string( RecordBase::head()->m_label ); + } + + KOKKOS_INLINE_FUNCTION static + SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); +#else + return (SharedAllocationRecord *) 0 ; +#endif + } + + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + static void print_records( std::ostream & , const Kokkos::Experimental::HBWSpace & , bool detail = false ); +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + + +template<class ExecutionSpace> +struct DeepCopy<Experimental::HBWSpace,Experimental::HBWSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + memcpy( dst , src , n ); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + memcpy( dst , src , n ); + } +}; + +template<class ExecutionSpace> +struct DeepCopy<HostSpace,Experimental::HBWSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + memcpy( dst , src , n ); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + memcpy( dst , src , n ); + } +}; + +template<class ExecutionSpace> +struct DeepCopy<Experimental::HBWSpace,HostSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + memcpy( dst , src , n ); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + memcpy( dst , src , n ); + } +}; + +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif +#endif /* #define KOKKOS_HBWSPACE_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5fe686559a07d63cb4a07bf821203672c1336699 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -0,0 +1,275 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HOSTSPACE_HPP +#define KOKKOS_HOSTSPACE_HPP + +#include <cstring> +#include <string> +#include <iosfwd> +#include <typeinfo> + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_MemoryTraits.hpp> + +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Error.hpp> + +#include <impl/KokkosExp_SharedAlloc.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +/// \brief Initialize lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function initializes the locks to zero (unset). +void init_lock_array_host_space(); + +/// \brief Aquire a lock for the address +/// +/// This function tries to aquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully aquired the +/// function returns true. Otherwise it returns false. +bool lock_address_host_space(void* ptr); + +/// \brief Release lock for the address +/// +/// This function releases the lock for the hash value derived +/// from the provided ptr. This function should only be called +/// after previously successfully aquiring a lock with +/// lock_address. +void unlock_address_host_space(void* ptr); + +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { + +/// \class HostSpace +/// \brief Memory management for host memory. +/// +/// HostSpace is a memory space that governs host memory. "Host" +/// memory means the usual CPU-accessible memory. +class HostSpace { +public: + + //! Tag this class as a kokkos memory space + typedef HostSpace memory_space ; + typedef size_t size_type ; + + /// \typedef execution_space + /// \brief Default execution space for this memory space. + /// + /// Every memory space has a default execution space. This is + /// useful for things like initializing a View (which happens in + /// parallel using the View's default execution space). +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef Kokkos::OpenMP execution_space ; +#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Kokkos::Threads execution_space ; +#elif defined( KOKKOS_HAVE_OPENMP ) + typedef Kokkos::OpenMP execution_space ; +#elif defined( KOKKOS_HAVE_PTHREAD ) + typedef Kokkos::Threads execution_space ; +#elif defined( KOKKOS_HAVE_SERIAL ) + typedef Kokkos::Serial execution_space ; +#else +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +#endif + + //! This memory space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + /*--------------------------------*/ + /* Functions unique to the HostSpace */ + static int in_parallel(); + + static void register_in_parallel( int (*)() ); + + /*--------------------------------*/ + + /**\brief Default memory space instance */ + HostSpace(); + HostSpace( HostSpace && rhs ) = default ; + HostSpace( const HostSpace & rhs ) = default ; + HostSpace & operator = ( HostSpace && ) = default ; + HostSpace & operator = ( const HostSpace & ) = default ; + ~HostSpace() = default ; + + /**\brief Non-default memory space instance to choose allocation mechansim, if available */ + + enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + + explicit + HostSpace( const AllocationMechanism & ); + + /**\brief Allocate untracked memory in the space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + +private: + + AllocationMechanism m_alloc_mech ; + + friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ; +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template<> +class SharedAllocationRecord< Kokkos::HostSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + friend Kokkos::HostSpace ; + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + /**\brief Root record for tracked allocations from this HostSpace instance */ + static RecordBase s_root_record ; + + const Kokkos::HostSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() = default ; + + SharedAllocationRecord( const Kokkos::HostSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + inline + std::string get_label() const + { + return std::string( RecordBase::head()->m_label ); + } + + KOKKOS_INLINE_FUNCTION static + SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); +#else + return (SharedAllocationRecord *) 0 ; +#endif + } + + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::HostSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false ); +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space> struct DeepCopy ; + +template<class ExecutionSpace> +struct DeepCopy<HostSpace,HostSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + memcpy( dst , src , n ); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + memcpy( dst , src , n ); + } +}; + +} // namespace Impl +} // namespace Kokkos + + +#endif /* #define KOKKOS_HOSTSPACE_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c77c33703bdd76161b20c2e5ae59b96c03c4550e --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -0,0 +1,233 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_Layout.hpp +/// \brief Declaration of various \c MemoryLayout options. + +#ifndef KOKKOS_LAYOUT_HPP +#define KOKKOS_LAYOUT_HPP + +#include <stddef.h> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Tags.hpp> + +namespace Kokkos { + +enum { ARRAY_LAYOUT_MAX_RANK = 8 }; + +//---------------------------------------------------------------------------- +/// \struct LayoutLeft +/// \brief Memory layout tag indicating left-to-right (Fortran scheme) +/// striding of multi-indices. +/// +/// This is an example of a \c MemoryLayout template parameter of +/// View. The memory layout describes how View maps from a +/// multi-index (i0, i1, ..., ik) to a memory location. +/// +/// "Layout left" indicates a mapping where the leftmost index i0 +/// refers to contiguous access, and strides increase for dimensions +/// going right from there (i1, i2, ...). This layout imitates how +/// Fortran stores multi-dimensional arrays. For the special case of +/// a two-dimensional array, "layout left" is also called "column +/// major." +struct LayoutLeft { + //! Tag this class as a kokkos array layout + typedef LayoutLeft array_layout ; + + size_t dimension[ ARRAY_LAYOUT_MAX_RANK ]; + + LayoutLeft( LayoutLeft const & ) = default ; + LayoutLeft( LayoutLeft && ) = default ; + LayoutLeft & operator = ( LayoutLeft const & ) = default ; + LayoutLeft & operator = ( LayoutLeft && ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr + LayoutLeft( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0 + , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 ) + : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {} +}; + +//---------------------------------------------------------------------------- +/// \struct LayoutRight +/// \brief Memory layout tag indicating right-to-left (C or +/// lexigraphical scheme) striding of multi-indices. +/// +/// This is an example of a \c MemoryLayout template parameter of +/// View. The memory layout describes how View maps from a +/// multi-index (i0, i1, ..., ik) to a memory location. +/// +/// "Right layout" indicates a mapping where the rightmost index ik +/// refers to contiguous access, and strides increase for dimensions +/// going left from there. This layout imitates how C stores +/// multi-dimensional arrays. For the special case of a +/// two-dimensional array, "layout right" is also called "row major." +struct LayoutRight { + //! Tag this class as a kokkos array layout + typedef LayoutRight array_layout ; + + size_t dimension[ ARRAY_LAYOUT_MAX_RANK ]; + + LayoutRight( LayoutRight const & ) = default ; + LayoutRight( LayoutRight && ) = default ; + LayoutRight & operator = ( LayoutRight const & ) = default ; + LayoutRight & operator = ( LayoutRight && ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr + LayoutRight( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0 + , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 ) + : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {} +}; + +//---------------------------------------------------------------------------- +/// \struct LayoutStride +/// \brief Memory layout tag indicated arbitrarily strided +/// multi-index mapping into contiguous memory. +struct LayoutStride { + + //! Tag this class as a kokkos array layout + typedef LayoutStride array_layout ; + + size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; + size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; + + /** \brief Compute strides from ordered dimensions. + * + * Values of order uniquely form the set [0..rank) + * and specify ordering of the dimensions. + * Order = {0,1,2,...} is LayoutLeft + * Order = {...,2,1,0} is LayoutRight + */ + template< typename iTypeOrder , typename iTypeDimen > + KOKKOS_INLINE_FUNCTION static + LayoutStride order_dimensions( int const rank + , iTypeOrder const * const order + , iTypeDimen const * const dimen ) + { + LayoutStride tmp ; + // Verify valid rank order: + int check_input = ARRAY_LAYOUT_MAX_RANK < rank ? 0 : int( 1 << rank ) - 1 ; + for ( int r = 0 ; r < ARRAY_LAYOUT_MAX_RANK ; ++r ) { + tmp.dimension[r] = 0 ; + tmp.stride[r] = 0 ; + check_input &= ~int( 1 << order[r] ); + } + if ( 0 == check_input ) { + size_t n = 1 ; + for ( int r = 0 ; r < rank ; ++r ) { + tmp.stride[ order[r] ] = n ; + n *= ( dimen[order[r]] ); + tmp.dimension[r] = dimen[r]; + } + } + return tmp ; + } + + KOKKOS_INLINE_FUNCTION constexpr + LayoutStride( size_t N0 = 0 , size_t S0 = 0 + , size_t N1 = 0 , size_t S1 = 0 + , size_t N2 = 0 , size_t S2 = 0 + , size_t N3 = 0 , size_t S3 = 0 + , size_t N4 = 0 , size_t S4 = 0 + , size_t N5 = 0 , size_t S5 = 0 + , size_t N6 = 0 , size_t S6 = 0 + , size_t N7 = 0 , size_t S7 = 0 + ) + : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } + , stride { S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 } + {} +}; + +//---------------------------------------------------------------------------- +/// \struct LayoutTileLeft +/// \brief Memory layout tag indicating left-to-right (Fortran scheme) +/// striding of multi-indices by tiles. +/// +/// This is an example of a \c MemoryLayout template parameter of +/// View. The memory layout describes how View maps from a +/// multi-index (i0, i1, ..., ik) to a memory location. +/// +/// "Tiled layout" indicates a mapping to contiguously stored +/// <tt>ArgN0</tt> by <tt>ArgN1</tt> tiles for the rightmost two +/// dimensions. Indices are LayoutLeft within each tile, and the +/// tiles themselves are arranged using LayoutLeft. Note that the +/// dimensions <tt>ArgN0</tt> and <tt>ArgN1</tt> of the tiles must be +/// compile-time constants. This speeds up index calculations. If +/// both tile dimensions are powers of two, Kokkos can optimize +/// further. +template < unsigned ArgN0 , unsigned ArgN1 , + bool IsPowerOfTwo = ( Impl::is_integral_power_of_two(ArgN0) && + Impl::is_integral_power_of_two(ArgN1) ) + > +struct LayoutTileLeft { + + static_assert( Impl::is_integral_power_of_two(ArgN0) && + Impl::is_integral_power_of_two(ArgN1) + , "LayoutTileLeft must be given power-of-two tile dimensions" ); + + //! Tag this class as a kokkos array layout + typedef LayoutTileLeft<ArgN0,ArgN1,IsPowerOfTwo> array_layout ; + + enum { N0 = ArgN0 }; + enum { N1 = ArgN1 }; + + size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; + + LayoutTileLeft( LayoutTileLeft const & ) = default ; + LayoutTileLeft( LayoutTileLeft && ) = default ; + LayoutTileLeft & operator = ( LayoutTileLeft const & ) = default ; + LayoutTileLeft & operator = ( LayoutTileLeft && ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr + LayoutTileLeft( size_t argN0 = 0 , size_t argN1 = 0 , size_t argN2 = 0 , size_t argN3 = 0 + , size_t argN4 = 0 , size_t argN5 = 0 , size_t argN6 = 0 , size_t argN7 = 0 + ) + : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {} +}; + +} // namespace Kokkos + +#endif // #ifndef KOKKOS_LAYOUT_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7d1e59af5e473db94a5ed6361bb3d6ee7b9b47e6 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -0,0 +1,470 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_MACROS_HPP +#define KOKKOS_MACROS_HPP + +//---------------------------------------------------------------------------- +/** Pick up configure/build options via #define macros: + * + * KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces + * KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space + * KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space + * KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space + * KOKKOS_HAVE_HWLOC HWLOC library is available + * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive! + * KOKKOS_HAVE_CXX11 enable C++11 features + * + * KOKKOS_HAVE_MPI negotiate MPI/execution space interactions + * + * KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space + */ + +#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H +#include <KokkosCore_config.h> +#endif + +//---------------------------------------------------------------------------- +/** Pick up compiler specific #define macros: + * + * Macros for known compilers evaluate to an integral version value + * + * KOKKOS_COMPILER_NVCC + * KOKKOS_COMPILER_GNU + * KOKKOS_COMPILER_INTEL + * KOKKOS_COMPILER_IBM + * KOKKOS_COMPILER_CRAYC + * KOKKOS_COMPILER_APPLECC + * KOKKOS_COMPILER_CLANG + * KOKKOS_COMPILER_PGI + * + * Macros for which compiler extension to use for atomics on intrinsice types + * + * KOKKOS_ATOMICS_USE_CUDA + * KOKKOS_ATOMICS_USE_GNU + * KOKKOS_ATOMICS_USE_INTEL + * KOKKOS_ATOMICS_USE_OPENMP31 + * + * A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use. + * + * Macros for marking functions to run in an execution space: + * + * KOKKOS_FUNCTION + * KOKKOS_INLINE_FUNCTION request compiler to inline + * KOKKOS_FORCEINLINE_FUNCTION force compiler to inline, use with care! + */ + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) + +/* Compiling with a CUDA compiler. + * + * Include <cuda.h> to pick up the CUDA_VERSION macro defined as: + * CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) + * + * When generating device code the __CUDA_ARCH__ macro is defined as: + * __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) + */ + +#include <cuda_runtime.h> +#include <cuda.h> + +#if ! defined( CUDA_VERSION ) +#error "#include <cuda.h> did not define CUDA_VERSION" +#endif + +#if ( CUDA_VERSION < 6050 ) +// CUDA supports (inofficially) C++11 in device code starting with +// version 6.5. This includes auto type and device code internal +// lambdas. +#error "Cuda version 6.5 or greater required" +#endif + +#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) +/* Compiling with CUDA compiler for device code. */ +#error "Cuda device capability >= 3.0 is required" +#endif + +#ifdef KOKKOS_CUDA_USE_LAMBDA +#if ( CUDA_VERSION < 7000 ) +// CUDA supports C++11 lambdas generated in host code to be given +// to the device starting with version 7.5. But the release candidate (7.5.6) +// still identifies as 7.0 +#error "Cuda version 7.5 or greater required for host-to-device Lambda support" +#endif +#if ( CUDA_VERSION < 8000 ) +#define KOKKOS_LAMBDA [=]__device__ +#else +#define KOKKOS_LAMBDA [=]__host__ __device__ +#endif +#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 +#endif +#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */ + + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) + // Cuda version 8.0 still needs the functor wrapper + #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) + #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER + #endif +#endif + +/*--------------------------------------------------------------------------*/ +/* Language info: C++, CUDA, OPENMP */ + +#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) + // Compiling Cuda code to 'ptx' + + #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ + #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline + #define KOKKOS_FUNCTION __device__ __host__ + +#endif /* #if defined( __CUDA_ARCH__ ) */ + +#if defined( _OPENMP ) + + /* Compiling with OpenMP. + * The value of _OPENMP is an integer value YYYYMM + * where YYYY and MM are the year and month designation + * of the supported OpenMP API version. + */ + +#endif /* #if defined( _OPENMP ) */ + +/*--------------------------------------------------------------------------*/ +/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */ + +#if defined( __NVCC__ ) + // NVIDIA compiler is being used. + // Code is parsed and separated into host and device code. + // Host code is compiled again with another compiler. + // Device code is compile to 'ptx'. + #define KOKKOS_COMPILER_NVCC __NVCC__ + +#else +#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA ) + // CUDA (including version 6.5) does not support giving lambdas as + // arguments to global functions. Thus its not currently possible + // to dispatch lambdas from the host. + #define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 + #endif +#endif /* #if defined( __NVCC__ ) */ + +#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA) + #define KOKKOS_LAMBDA [=] +#endif + +#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */ + +/* Intel compiler for host code */ + +#if defined( __INTEL_COMPILER ) + #define KOKKOS_COMPILER_INTEL __INTEL_COMPILER +#elif defined( __ICC ) + // Old define + #define KOKKOS_COMPILER_INTEL __ICC +#elif defined( __ECC ) + // Very old define + #define KOKKOS_COMPILER_INTEL __ECC +#endif + +/* CRAY compiler for host code */ +#if defined( _CRAYC ) + #define KOKKOS_COMPILER_CRAYC _CRAYC +#endif + +#if defined( __IBMCPP__ ) + // IBM C++ + #define KOKKOS_COMPILER_IBM __IBMCPP__ +#elif defined( __IBMC__ ) + #define KOKKOS_COMPILER_IBM __IBMC__ +#endif + +#if defined( __APPLE_CC__ ) + #define KOKKOS_COMPILER_APPLECC __APPLE_CC__ +#endif + +#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL) + #define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__ +#endif + +#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ ) + #define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__ + #if ( 472 > KOKKOS_COMPILER_GNU ) + #error "Compiling with GCC version earlier than 4.7.2 is not supported." + #endif +#endif + +#if defined( __PGIC__ ) && ! defined( __GNUC__ ) + #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ + #if ( 1540 > KOKKOS_COMPILER_PGI ) + #error "Compiling with PGI version earlier than 15.4 is not supported." + #endif +#endif + +#endif /* #if ! defined( __CUDA_ARCH__ ) */ + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* Intel compiler macros */ + +#if defined( KOKKOS_COMPILER_INTEL ) + + #define KOKKOS_HAVE_PRAGMA_UNROLL 1 + #define KOKKOS_HAVE_PRAGMA_IVDEP 1 + #define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1 + #define KOKKOS_HAVE_PRAGMA_VECTOR 1 + #define KOKKOS_HAVE_PRAGMA_SIMD 1 + + #if ( 1400 > KOKKOS_COMPILER_INTEL ) + #if ( 1300 > KOKKOS_COMPILER_INTEL ) + #error "Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0." + #else + #warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0." + #endif + #endif + #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 ) + #define KOKKOS_ENABLE_ASM 1 + #endif + + #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined (_WIN32) + #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) + #else + #define KOKKOS_FORCEINLINE_FUNCTION inline + #endif + #endif + + #if defined( __MIC__ ) + // Compiling for Xeon Phi + #endif + +#endif + +/*--------------------------------------------------------------------------*/ +/* Cray compiler macros */ + +#if defined( KOKKOS_COMPILER_CRAYC ) + + +#endif + +/*--------------------------------------------------------------------------*/ +/* IBM Compiler macros */ + +#if defined( KOKKOS_COMPILER_IBM ) + + #define KOKKOS_HAVE_PRAGMA_UNROLL 1 + //#define KOKKOS_HAVE_PRAGMA_IVDEP 1 + //#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1 + //#define KOKKOS_HAVE_PRAGMA_VECTOR 1 + //#define KOKKOS_HAVE_PRAGMA_SIMD 1 + +#endif + +/*--------------------------------------------------------------------------*/ +/* CLANG compiler macros */ + +#if defined( KOKKOS_COMPILER_CLANG ) + + //#define KOKKOS_HAVE_PRAGMA_UNROLL 1 + //#define KOKKOS_HAVE_PRAGMA_IVDEP 1 + //#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1 + //#define KOKKOS_HAVE_PRAGMA_VECTOR 1 + //#define KOKKOS_HAVE_PRAGMA_SIMD 1 + + #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) + #endif + +#endif + +/*--------------------------------------------------------------------------*/ +/* GNU Compiler macros */ + +#if defined( KOKKOS_COMPILER_GNU ) + + //#define KOKKOS_HAVE_PRAGMA_UNROLL 1 + //#define KOKKOS_HAVE_PRAGMA_IVDEP 1 + //#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1 + //#define KOKKOS_HAVE_PRAGMA_VECTOR 1 + //#define KOKKOS_HAVE_PRAGMA_SIMD 1 + + #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) + #endif + + #if ! defined( KOKKOS_ENABLE_ASM ) && \ + ! ( defined( __powerpc) || \ + defined(__powerpc__) || \ + defined(__powerpc64__) || \ + defined(__POWERPC__) || \ + defined(__ppc__) || \ + defined(__ppc64__) || \ + defined(__PGIC__) ) + #define KOKKOS_ENABLE_ASM 1 + #endif + +#endif + +/*--------------------------------------------------------------------------*/ + +#if defined( KOKKOS_COMPILER_PGI ) + + #define KOKKOS_HAVE_PRAGMA_UNROLL 1 + #define KOKKOS_HAVE_PRAGMA_IVDEP 1 + //#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1 + #define KOKKOS_HAVE_PRAGMA_VECTOR 1 + //#define KOKKOS_HAVE_PRAGMA_SIMD 1 + +#endif + +/*--------------------------------------------------------------------------*/ + +#if defined( KOKKOS_COMPILER_NVCC ) + + #if defined(__CUDA_ARCH__ ) + #define KOKKOS_HAVE_PRAGMA_UNROLL 1 + #endif + +#endif + +//---------------------------------------------------------------------------- +/** Define function marking macros if compiler specific macros are undefined: */ + +#if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) +#define KOKKOS_FORCEINLINE_FUNCTION inline +#endif + +#if ! defined( KOKKOS_INLINE_FUNCTION ) +#define KOKKOS_INLINE_FUNCTION inline +#endif + +#if ! defined( KOKKOS_FUNCTION ) +#define KOKKOS_FUNCTION /**/ +#endif + +//---------------------------------------------------------------------------- +/** Define Macro for alignment: */ +#if ! defined(KOKKOS_ALIGN_16) +#define KOKKOS_ALIGN_16 __attribute__((aligned(16))) +#endif + +//---------------------------------------------------------------------------- +/** Determine the default execution space for parallel dispatch. + * There is zero or one default execution space specified. + */ + +#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ + ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ + ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ + ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) + +#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ; + +#endif + +/** If default is not specified then chose from enabled execution spaces. + * Priority: CUDA, OPENMP, THREADS, SERIAL + */ +#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) +#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) +#elif defined ( KOKKOS_HAVE_CUDA ) +#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA +#elif defined ( KOKKOS_HAVE_OPENMP ) +#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP +#elif defined ( KOKKOS_HAVE_PTHREAD ) +#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS +#else +#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL +#endif + +//---------------------------------------------------------------------------- +/** Determine for what space the code is being compiled: */ + +#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA) +#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA +#else +#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \ + ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 ) +#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN) +#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1 +#endif +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/**Enable Profiling by default**/ + +#ifndef KOKKOS_ENABLE_PROFILING +#define KOKKOS_ENABLE_PROFILING 1 +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/* Transitional macro to change between old and new View + * are no longer supported. + */ + +#if defined( KOKKOS_USING_DEPRECATED_VIEW ) +#error "Kokkos deprecated View has been removed" +#endif + +#define KOKKOS_USING_EXP_VIEW 1 +#define KOKKOS_USING_EXPERIMENTAL_VIEW + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_MACROS_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d843f7c9a1442f9ce1a268c04bf6395f28ed94c7 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -0,0 +1,1523 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_MEMORYPOOL_HPP +#define KOKKOS_MEMORYPOOL_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Parallel.hpp> +#include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_BitOps.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/KokkosExp_SharedAlloc.hpp> + +#include <limits> +#include <algorithm> +#include <chrono> + +// How should errors be handled? In general, production code should return a +// value indicating failure so the user can decide how the error is handled. +// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is +// defined, the code will abort with an error message. Otherwise, the code will +// return with a value indicating failure when possible, or do nothing instead. +//#define KOKKOS_MEMPOOL_PRINTERR + +//#define KOKKOS_MEMPOOL_PRINT_INFO +//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO +//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO +//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO +//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS +//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO +//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO + +// A superblock is considered full when this percentage of its pages are full. +#define KOKKOS_MEMPOOL_SB_FULL_FRACTION 0.80 + +// A page is considered full when this percentage of its blocks are full. +#define KOKKOS_MEMPOOL_PAGE_FULL_FRACTION 0.875 // 28 / 32 + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +namespace MempoolImpl { + +template < typename T, typename ExecutionSpace > +struct initialize_array { + typedef ExecutionSpace execution_space; + typedef typename ExecutionSpace::size_type size_type; + + T * m_data; + T m_value; + + initialize_array( T * d, size_t size, T v ) : m_data( d ), m_value( v ) + { + Kokkos::parallel_for( size, *this ); + + execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const { m_data[i] = m_value; } +}; + +template <typename Bitset> +struct bitset_count +{ + typedef typename Bitset::execution_space execution_space; + typedef typename execution_space::size_type size_type; + typedef typename Bitset::size_type value_type; + typedef typename Bitset::word_type word_type; + + word_type * m_words; + value_type & m_result; + + bitset_count( word_type * w, value_type num_words, value_type & r ) + : m_words( w ), m_result( r ) + { + parallel_reduce( num_words, *this, m_result ); + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, volatile value_type const & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i, value_type & count) const + { + count += Kokkos::Impl::bit_count( m_words[i] ); + } +}; + +template < typename Device > +class Bitset { +public: + typedef typename Device::execution_space execution_space; + typedef typename Device::memory_space memory_space; + typedef unsigned word_type; + typedef unsigned size_type; + + typedef Kokkos::Impl::DeepCopy< memory_space, Kokkos::HostSpace > raw_deep_copy; + + // Define some constants. + enum { + // Size of bitset word. Should be 32. + WORD_SIZE = sizeof(word_type) * CHAR_BIT, + LG_WORD_SIZE = Kokkos::Impl::integral_power_of_two( WORD_SIZE ), + WORD_MASK = WORD_SIZE - 1 + }; + +private: + word_type * m_words; + size_type m_size; + size_type m_num_words; + word_type m_last_word_mask; + +public: + ~Bitset() = default; + Bitset() = default; + Bitset( Bitset && ) = default; + Bitset( const Bitset & ) = default; + Bitset & operator = ( Bitset && ) = default; + Bitset & operator = ( const Bitset & ) = default; + + void init( void * w, size_type s ) + { + // Assumption: The size of the memory pointed to by w is a multiple of + // sizeof(word_type). + + m_words = reinterpret_cast<word_type*>( w ); + m_size = s; + m_num_words = ( s + WORD_SIZE - 1 ) >> LG_WORD_SIZE; + m_last_word_mask = m_size & WORD_MASK ? ( word_type(1) << ( m_size & WORD_MASK ) ) - 1 : 0; + + reset(); + } + + size_type size() const { return m_size; } + + size_type count() const + { + size_type val; + bitset_count< Bitset > bc( m_words, m_num_words, val ); + return val; + } + + void set() + { + // Set all the bits. + initialize_array< word_type, execution_space > ia( m_words, m_num_words, ~word_type(0) ); + + if ( m_last_word_mask ) { + // Clear the unused bits in the last block. + raw_deep_copy( m_words + ( m_num_words - 1 ), &m_last_word_mask, sizeof(word_type) ); + } + } + + void reset() + { + initialize_array< word_type, execution_space > ia( m_words, m_num_words, word_type(0) ); + } + + KOKKOS_FORCEINLINE_FUNCTION + bool test( size_type i ) const + { + size_type word_pos = i >> LG_WORD_SIZE; + word_type word = volatile_load( &m_words[ word_pos ] ); + word_type mask = word_type(1) << ( i & WORD_MASK ); + + return word & mask; + } + + KOKKOS_FORCEINLINE_FUNCTION + bool set( size_type i ) const + { + size_type word_pos = i >> LG_WORD_SIZE; + word_type mask = word_type(1) << ( i & WORD_MASK ); + + return !( atomic_fetch_or( &m_words[ word_pos ], mask ) & mask ); + } + + KOKKOS_FORCEINLINE_FUNCTION + bool reset( size_type i ) const + { + size_type word_pos = i >> LG_WORD_SIZE; + word_type mask = word_type(1) << ( i & WORD_MASK ); + + return atomic_fetch_and( &m_words[ word_pos ], ~mask ) & mask; + } + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, word_type > + fetch_word_reset( size_type i ) const + { + size_type word_pos = i >> LG_WORD_SIZE; + word_type mask = word_type(1) << ( i & WORD_MASK ); + + Kokkos::pair<bool, word_type> result; + result.second = atomic_fetch_and( &m_words[ word_pos ], ~mask ); + result.first = result.second & mask; + + return result; + } + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, size_type > + set_any_in_word( size_type i, word_type & prev_val ) const + { + prev_val = 0; + + size_type word_pos = i >> LG_WORD_SIZE; + word_type word = volatile_load( &m_words[ word_pos ] ); + + // Loop until there are no more unset bits in the word. + while ( ~word ) { + // Find the first unset bit in the word. + size_type bit = Kokkos::Impl::bit_scan_forward( ~word ); + + // Try to set the bit. + word_type mask = word_type(1) << bit; + word = atomic_fetch_or( &m_words[ word_pos ], mask ); + + if ( !( word & mask ) ) { + // Successfully set the bit. + prev_val = word; + + return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + } + } + + // Didn't find a free bit in this word. + return Kokkos::pair<bool, size_type>( false, i ); + } + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, size_type > + set_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const + { + prev_val = 0; + + size_type word_pos = i >> LG_WORD_SIZE; + word_type word = volatile_load( &m_words[ word_pos ] ); + word = ( ~word ) & word_mask; + + // Loop until there are no more unset bits in the word. + while ( word ) { + // Find the first unset bit in the word. + size_type bit = Kokkos::Impl::bit_scan_forward( word ); + + // Try to set the bit. + word_type mask = word_type(1) << bit; + word = atomic_fetch_or( &m_words[ word_pos ], mask ); + + if ( !( word & mask ) ) { + // Successfully set the bit. + prev_val = word; + + return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + } + + word = ( ~word ) & word_mask; + } + + // Didn't find a free bit in this word. + return Kokkos::pair<bool, size_type>( false, i ); + } + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, size_type > + reset_any_in_word( size_type i, word_type & prev_val ) const + { + prev_val = 0; + + size_type word_pos = i >> LG_WORD_SIZE; + word_type word = volatile_load( &m_words[ word_pos ] ); + + // Loop until there are no more set bits in the word. + while ( word ) { + // Find the first unset bit in the word. + size_type bit = Kokkos::Impl::bit_scan_forward( word ); + + // Try to reset the bit. + word_type mask = word_type(1) << bit; + word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); + + if ( word & mask ) { + // Successfully reset the bit. + prev_val = word; + + return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + } + } + + // Didn't find a free bit in this word. + return Kokkos::pair<bool, size_type>( false, i ); + } + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, size_type > + reset_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const + { + prev_val = 0; + + size_type word_pos = i >> LG_WORD_SIZE; + word_type word = volatile_load( &m_words[ word_pos ] ); + word = word & word_mask; + + // Loop until there are no more set bits in the word. + while ( word ) { + // Find the first unset bit in the word. + size_type bit = Kokkos::Impl::bit_scan_forward( word ); + + // Try to reset the bit. + word_type mask = word_type(1) << bit; + word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); + + if ( word & mask ) { + // Successfully reset the bit. + prev_val = word; + + return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + } + + word = word & word_mask; + } + + // Didn't find a free bit in this word. + return Kokkos::pair<bool, size_type>( false, i ); + } +}; + +template < typename UInt32View, typename BSHeaderView, typename SBHeaderView, + typename MempoolBitset > +struct create_histogram { + typedef typename UInt32View::execution_space execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::pair< double, uint32_t > value_type; + + size_t m_start; + UInt32View m_page_histogram; + BSHeaderView m_blocksize_info; + SBHeaderView m_sb_header; + MempoolBitset m_sb_blocks; + size_t m_lg_max_sb_blocks; + uint32_t m_lg_min_block_size; + uint32_t m_blocks_per_page; + value_type & m_result; + + create_histogram( size_t start, size_t end, UInt32View ph, BSHeaderView bsi, + SBHeaderView sbh, MempoolBitset sbb, size_t lmsb, + uint32_t lmbs, uint32_t bpp, value_type & r ) + : m_start( start ), m_page_histogram( ph ), m_blocksize_info( bsi ), + m_sb_header( sbh ), m_sb_blocks( sbb ), m_lg_max_sb_blocks( lmsb ), + m_lg_min_block_size( lmbs ), m_blocks_per_page( bpp ), m_result( r ) + { + Kokkos::parallel_reduce( end - start, *this, m_result ); + + execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { + v.first = 0.0; + v.second = 0; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, volatile value_type const & src ) const + { + dst.first += src.first; + dst.second += src.second; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i, value_type & r ) const + { + size_type i2 = i + m_start; + + uint32_t lg_block_size = m_sb_header(i2).m_lg_block_size; + + // A superblock only has a block size of 0 when it is empty. + if ( lg_block_size != 0 ) { + uint32_t block_size_id = lg_block_size - m_lg_min_block_size; + uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb; + uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + + uint32_t total_allocated_blocks = 0; + + for ( uint32_t j = 0; j < pages_per_sb; ++j ) { + unsigned start_pos = ( i2 << m_lg_max_sb_blocks ) + j * m_blocks_per_page; + unsigned end_pos = start_pos + m_blocks_per_page; + uint32_t page_allocated_blocks = 0; + + for ( unsigned k = start_pos; k < end_pos; ++k ) { + page_allocated_blocks += m_sb_blocks.test( k ); + } + + total_allocated_blocks += page_allocated_blocks; + + atomic_fetch_add( &m_page_histogram(page_allocated_blocks), 1 ); + } + + r.first += double(total_allocated_blocks) / blocks_per_sb; + r.second += blocks_per_sb; + } + } +}; + +#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO +template < typename UInt32View, typename SBHeaderView, typename MempoolBitset > +struct count_allocated_blocks { + typedef typename UInt32View::execution_space execution_space; + typedef typename execution_space::size_type size_type; + + UInt32View m_num_allocated_blocks; + SBHeaderView m_sb_header; + MempoolBitset m_sb_blocks; + size_t m_sb_size; + size_t m_lg_max_sb_blocks; + + count_allocated_blocks( size_t num_sb, UInt32View nab, SBHeaderView sbh, + MempoolBitset sbb, size_t sbs, size_t lmsb ) + : m_num_allocated_blocks( nab ), m_sb_header( sbh ), + m_sb_blocks( sbb ), m_sb_size( sbs ), m_lg_max_sb_blocks( lmsb ) + { + Kokkos::parallel_for( num_sb, *this ); + + execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + uint32_t lg_block_size = m_sb_header(i).m_lg_block_size; + + // A superblock only has a block size of 0 when it is empty. + if ( lg_block_size != 0 ) { + // Count the allocated blocks in the superblock. + uint32_t blocks_per_sb = lg_block_size > 0 ? m_sb_size >> lg_block_size : 0; + unsigned start_pos = i << m_lg_max_sb_blocks; + unsigned end_pos = start_pos + blocks_per_sb; + uint32_t count = 0; + + for ( unsigned j = start_pos; j < end_pos; ++j ) { + count += m_sb_blocks.test( j ); + } + + m_num_allocated_blocks(i) = count; + } + } +}; +#endif + +} + +/// \class MemoryPool +/// \brief Bitset based memory manager for pools of same-sized chunks of memory. +/// \tparam Device Kokkos device that gives the execution and memory space the +/// allocator will be used in. +/// +/// MemoryPool is a memory space that can be on host or device. It provides a +/// pool memory allocator for fast allocation of same-sized chunks of memory. +/// The memory is only accessible on the host / device this allocator is +/// associated with. +/// +/// This allocator is based on ideas from the following GPU allocators: +/// Halloc (https://github.com/canonizer/halloc). +/// ScatterAlloc (https://github.com/ComputationalRadiationPhysics/scatteralloc) +template < typename Device > +class MemoryPool { +private: + // The allocator uses superblocks. A superblock is divided into pages, and a + // page is divided into blocks. A block is the chunk of memory that is given + // out by the allocator. A page always has a number of blocks equal to the + // size of the word used by the bitset. Thus, the pagesize can vary between + // superblocks as it is based on the block size of the superblock. The + // allocator supports all powers of 2 from MIN_BLOCK_SIZE to the size of a + // superblock as block sizes. + + // Superblocks are divided into 4 categories: + // 1. empty - is completely empty; there are no active allocations + // 2. partfull - partially full; there are some active allocations + // 3. full - full enough with active allocations that new allocations + // will likely fail + // 4. active - is currently the active superblock for a block size + // + // An inactive superblock is one that is empty, partfull, or full. + // + // New allocations occur only from an active superblock. If a superblock is + // made inactive after an allocation request is made to it but before the + // allocation request is fulfilled, the allocation will still be attempted + // from that superblock. Deallocations can occur to partfull, full, or + // active superblocks. Superblocks move between categories as allocations + // and deallocations happen. Superblocks all start empty. + // + // Here are the possible moves between categories: + // empty -> active During allocation, there is no active superblock + // or the active superblock is full. + // active -> full During allocation, the full threshold of the + // superblock is reached when increasing the fill + // level. + // full -> partfull During deallocation, the full threshold of the + // superblock is crossed when decreasing the fill + // level. + // partfull -> empty Deallocation of the last allocated block of an + // inactive superblock. + // partfull -> active During allocation, the active superblock is full. + // + // When a new active superblock is needed, partfull superblocks of the same + // block size are chosen over empty superblocks. + // + // The empty and partfull superblocks are tracked using bitsets that represent + // the superblocks in those repsective categories. Empty superblocks use a + // single bitset, while partfull superblocks use a bitset per block size + // (contained sequentially in a single bitset). Active superblocks are + // tracked by the active superblocks array. Full superblocks aren't tracked + // at all. + + typedef typename Device::execution_space execution_space; + typedef typename Device::memory_space backend_memory_space; + typedef Device device_type; + typedef MempoolImpl::Bitset< device_type > MempoolBitset; + + // Define some constants. + enum { + MIN_BLOCK_SIZE = 64, + LG_MIN_BLOCK_SIZE = Kokkos::Impl::integral_power_of_two( MIN_BLOCK_SIZE ), + MAX_BLOCK_SIZES = 31 - LG_MIN_BLOCK_SIZE + 1, + + // Size of bitset word. + BLOCKS_PER_PAGE = MempoolBitset::WORD_SIZE, + LG_BLOCKS_PER_PAGE = MempoolBitset::LG_WORD_SIZE, + + INVALID_SUPERBLOCK = ~uint32_t(0), + SUPERBLOCK_LOCK = ~uint32_t(0) - 1, + + MAX_TRIES = 32 // Cap on the number of pages searched + // before an allocation returns empty. + }; + +public: + // Stores information about each superblock. + struct SuperblockHeader { + uint32_t m_full_pages; + uint32_t m_empty_pages; + uint32_t m_lg_block_size; + uint32_t m_is_active; + + KOKKOS_FUNCTION + SuperblockHeader() : + m_full_pages(0), m_empty_pages(0), m_lg_block_size(0), m_is_active(false) {} + }; + + // Stores information about each block size. + struct BlockSizeHeader { + uint32_t m_blocks_per_sb; + uint32_t m_pages_per_sb; + uint32_t m_sb_full_level; + uint32_t m_page_full_level; + + KOKKOS_FUNCTION + BlockSizeHeader() : + m_blocks_per_sb(0), m_pages_per_sb(0), m_sb_full_level(0), m_page_full_level(0) {} + }; + +private: + typedef Impl::SharedAllocationTracker Tracker; + typedef View< uint32_t *, device_type > UInt32View; + typedef View< SuperblockHeader *, device_type > SBHeaderView; + + // The letters 'sb' used in any variable name mean superblock. + + size_t m_lg_sb_size; // Log2 of superblock size. + size_t m_sb_size; // Superblock size. + size_t m_lg_max_sb_blocks; // Log2 of the number of blocks of the + // minimum block size in a superblock. + size_t m_num_sb; // Number of superblocks. + size_t m_ceil_num_sb; // Number of superblocks rounded up to the smallest + // multiple of the bitset word size. Used by + // bitsets representing superblock categories to + // ensure different block sizes never share a word + // in the bitset. + size_t m_num_block_size; // Number of block sizes supported. + size_t m_data_size; // Amount of memory available to the allocator. + size_t m_sb_blocks_size; // Amount of memory for free / empty blocks bitset. + size_t m_empty_sb_size; // Amount of memory for empty superblocks bitset. + size_t m_partfull_sb_size; // Amount of memory for partfull superblocks bitset. + size_t m_total_size; // Total amount of memory allocated. + char * m_data; // Beginning device memory location used for + // superblocks. + UInt32View m_active; // Active superblocks IDs. + SBHeaderView m_sb_header; // Header info for superblocks. + MempoolBitset m_sb_blocks; // Bitsets representing free / allocated status + // of blocks in superblocks. + MempoolBitset m_empty_sb; // Bitset representing empty superblocks. + MempoolBitset m_partfull_sb; // Bitsets representing partially full superblocks. + Tracker m_track; // Tracker for superblock memory. + BlockSizeHeader m_blocksize_info[MAX_BLOCK_SIZES]; // Header info for block sizes. + + // There were several methods tried for storing the block size header info: in a View, + // in a View of const data, and in a RandomAccess View. All of these were slower than + // storing it in a static array that is a member variable to the class. In the latter + // case, the block size info gets copied into the constant memory on the GPU along with + // the class when it is copied there for exeucting a parallel loop. Instead of storing + // the values, computing the values every time they were needed was also tried. This + // method was slightly slower than storing them in the static array. + +public: + //! Tag this class as a kokkos memory space + typedef MemoryPool memory_space; + + ~MemoryPool() = default; + MemoryPool() = default; + MemoryPool( MemoryPool && ) = default; + MemoryPool( const MemoryPool & ) = default; + MemoryPool & operator = ( MemoryPool && ) = default; + MemoryPool & operator = ( const MemoryPool & ) = default; + + /// \brief Initializes the memory pool. + /// \param memspace The memory space from which the memory pool will allocate memory. + /// \param total_size The requested memory amount controlled by the allocator. The + /// actual amount is rounded up to the smallest multiple of the + /// superblock size >= the requested size. + /// \param log2_superblock_size Log2 of the size of superblocks used by the allocator. + /// In most use cases, the default value should work. + inline + MemoryPool( const backend_memory_space & memspace, + size_t total_size, size_t log2_superblock_size = 20 ) + : m_lg_sb_size( log2_superblock_size ), + m_sb_size( size_t(1) << m_lg_sb_size ), + m_lg_max_sb_blocks( m_lg_sb_size - LG_MIN_BLOCK_SIZE ), + m_num_sb( ( total_size + m_sb_size - 1 ) >> m_lg_sb_size ), + m_ceil_num_sb( ( ( m_num_sb + BLOCKS_PER_PAGE - 1 ) >> LG_BLOCKS_PER_PAGE ) << + LG_BLOCKS_PER_PAGE ), + m_num_block_size( m_lg_sb_size - LG_MIN_BLOCK_SIZE + 1 ), + m_data_size( m_num_sb * m_sb_size ), + m_sb_blocks_size( ( m_num_sb << m_lg_max_sb_blocks ) / CHAR_BIT ), + m_empty_sb_size( m_ceil_num_sb / CHAR_BIT ), + m_partfull_sb_size( m_ceil_num_sb * m_num_block_size / CHAR_BIT ), + m_total_size( m_data_size + m_sb_blocks_size + m_empty_sb_size + m_partfull_sb_size ), + m_data(0), + m_active( "Active superblocks" ), + m_sb_header( "Superblock headers" ), + m_track() + { + // Assumption. The minimum block size must be a power of 2. + static_assert( Kokkos::Impl::is_integral_power_of_two( MIN_BLOCK_SIZE ), "" ); + + // Assumption. Require a superblock be large enough so it takes at least 1 + // whole bitset word to represent it using the minimum blocksize. + if ( m_sb_size < MIN_BLOCK_SIZE * BLOCKS_PER_PAGE ) { + printf( "\n** MemoryPool::MemoryPool() Superblock size must be >= %u **\n", + MIN_BLOCK_SIZE * BLOCKS_PER_PAGE ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } + + // Assumption. A superblock's size can be at most 2^31. Verify this. + if ( m_lg_sb_size > 31 ) { + printf( "\n** MemoryPool::MemoryPool() Superblock size must be < %u **\n", + ( uint32_t(1) << 31 ) ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } + + // Assumption. The Bitset only uses unsigned for size types which limits + // the amount of memory the allocator can manage. Verify the memory size + // is below this limit. + if ( m_data_size > size_t(MIN_BLOCK_SIZE) * std::numeric_limits<unsigned>::max() ) { + printf( "\n** MemoryPool::MemoryPool() Allocator can only manage %lu bytes of memory; requested %lu **\n", + size_t(MIN_BLOCK_SIZE) * std::numeric_limits<unsigned>::max(), total_size ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } + + // Allocate memory for Views. This is done here instead of at construction + // so that the runtime checks can be performed before allocating memory. + resize(m_active, m_num_block_size ); + resize(m_sb_header, m_num_sb ); + + // Allocate superblock memory. + typedef Impl::SharedAllocationRecord< backend_memory_space, void > SharedRecord; + SharedRecord * rec = + SharedRecord::allocate( memspace, "mempool", m_total_size ); + + m_track.assign_allocated_record_to_uninitialized( rec ); + m_data = reinterpret_cast<char *>( rec->data() ); + + // Set and initialize the free / empty block bitset memory. + m_sb_blocks.init( m_data + m_data_size, m_num_sb << m_lg_max_sb_blocks ); + + // Set and initialize the empty superblock block bitset memory. + m_empty_sb.init( m_data + m_data_size + m_sb_blocks_size, m_num_sb ); + + // Start with all superblocks in the empty category. + m_empty_sb.set(); + + // Set and initialize the partfull superblock block bitset memory. + m_partfull_sb.init( m_data + m_data_size + m_sb_blocks_size + m_empty_sb_size, + m_ceil_num_sb * m_num_block_size ); + + // Initialize all active superblocks to be invalid. + typename UInt32View::HostMirror host_active = create_mirror_view(m_active); + for (size_t i = 0; i < m_num_block_size; ++i) host_active(i) = INVALID_SUPERBLOCK; + + deep_copy(m_active, host_active); + + // Initialize the blocksize info. + for ( size_t i = 0; i < m_num_block_size; ++i ) { + uint32_t lg_block_size = i + LG_MIN_BLOCK_SIZE; + uint32_t blocks_per_sb = m_sb_size >> lg_block_size; + uint32_t pages_per_sb = ( blocks_per_sb + BLOCKS_PER_PAGE - 1 ) >> LG_BLOCKS_PER_PAGE; + + m_blocksize_info[i].m_blocks_per_sb = blocks_per_sb; + m_blocksize_info[i].m_pages_per_sb = pages_per_sb; + + // Set the full level for the superblock. + m_blocksize_info[i].m_sb_full_level = + static_cast<uint32_t>( pages_per_sb * KOKKOS_MEMPOOL_SB_FULL_FRACTION ); + + if ( m_blocksize_info[i].m_sb_full_level == 0 ) { + m_blocksize_info[i].m_sb_full_level = 1; + } + + // Set the full level for the page. + uint32_t blocks_per_page = + blocks_per_sb < BLOCKS_PER_PAGE ? blocks_per_sb : BLOCKS_PER_PAGE; + + m_blocksize_info[i].m_page_full_level = + static_cast<uint32_t>( blocks_per_page * KOKKOS_MEMPOOL_PAGE_FULL_FRACTION ); + + if ( m_blocksize_info[i].m_page_full_level == 0 ) { + m_blocksize_info[i].m_page_full_level = 1; + } + } + +#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO + printf( "\n" ); + printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size ); + printf( " m_sb_size: %12lu\n", m_sb_size ); + printf( " m_max_sb_blocks: %12lu\n", size_t(1) << m_lg_max_sb_blocks ); + printf( "m_lg_max_sb_blocks: %12lu\n", m_lg_max_sb_blocks ); + printf( " m_num_sb: %12lu\n", m_num_sb ); + printf( " m_ceil_num_sb: %12lu\n", m_ceil_num_sb ); + printf( " m_num_block_size: %12lu\n", m_num_block_size ); + printf( " data bytes: %12lu\n", m_data_size ); + printf( " sb_blocks bytes: %12lu\n", m_sb_blocks_size ); + printf( " empty_sb bytes: %12lu\n", m_empty_sb_size ); + printf( " partfull_sb bytes: %12lu\n", m_partfull_sb_size ); + printf( " total bytes: %12lu\n", m_total_size ); + printf( " m_empty_sb size: %12u\n", m_empty_sb.size() ); + printf( "m_partfull_sb size: %12u\n", m_partfull_sb.size() ); + printf( "\n" ); + fflush( stdout ); +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO + // Print the blocksize info for all the block sizes. + printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" ); + for ( size_t i = 0; i < m_num_block_size; ++i ) { + printf( "%4zu %13u %12u %13u %15u\n", i + LG_MIN_BLOCK_SIZE, + m_blocksize_info[i].m_blocks_per_sb, m_blocksize_info[i].m_pages_per_sb, + m_blocksize_info[i].m_sb_full_level, m_blocksize_info[i].m_page_full_level ); + } + printf( "\n" ); +#endif + } + + /// \brief The actual block size allocated given alloc_size. + KOKKOS_INLINE_FUNCTION + size_t allocate_block_size( const size_t alloc_size ) const + { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE); } + + /// \brief Allocate a chunk of memory. + /// \param alloc_size Size of the requested allocated in number of bytes. + /// + /// The function returns a void pointer to a memory location on success and + /// NULL on failure. + KOKKOS_FUNCTION + void * allocate( size_t alloc_size ) const + { + void * p = 0; + + // Only support allocations up to the superblock size. Just return 0 + // (failed allocation) for any size above this. + if (alloc_size <= m_sb_size ) + { + int block_size_id = get_block_size_index( alloc_size ); + uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb; + uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + unsigned word_size = blocks_per_sb > 32 ? 32 : blocks_per_sb; + unsigned word_mask = ( uint64_t(1) << word_size ) - 1; + + uint32_t sb_id = volatile_load( &m_active(block_size_id) ); + + // If the active is locked, keep reading it until the lock is released. + while ( sb_id == SUPERBLOCK_LOCK ) { + sb_id = volatile_load( &m_active(block_size_id) ); + } + + bool allocation_done = false; + + while (!allocation_done) { + bool need_new_sb = false; + + if (sb_id != INVALID_SUPERBLOCK) { + // Use the value from the clock register as the hash value. + uint64_t hash_val = get_clock_register(); + + // Get the starting position for this superblock's bits in the bitset. + uint32_t pos_base = sb_id << m_lg_max_sb_blocks; + + // Mod the hash value to choose a page in the superblock. The + // initial block searched is the first block of that page. + uint32_t pos_rel = uint32_t( hash_val & ( pages_per_sb - 1 ) ) << LG_BLOCKS_PER_PAGE; + + // Get the absolute starting position for this superblock's bits in the bitset. + uint32_t pos = pos_base + pos_rel; + + // Keep track of the number of pages searched. Pages in the superblock are + // searched linearly from the starting page. All pages in the superblock are + // searched until either a location is found, or it is proven empty. + uint32_t pages_searched = 0; + + bool search_done = false; + + while (!search_done) { + bool success; + unsigned prev_val; + + Kokkos::tie( success, pos ) = + m_sb_blocks.set_any_in_word( pos, prev_val, word_mask ); + + if ( !success ) { + if ( ++pages_searched >= pages_per_sb ) { + // Searched all the pages in this superblock. Look for a new superblock. + // + // The previous method tried limiting the number of pages searched, but + // that caused a huge performance issue in CUDA where the outer loop + // executed massive numbers of times. Threads weren't able to find a + // free location when the superblock wasn't full and were able to execute + // the outer loop many times before the superblock was switched for a new + // one. Switching to an exhaustive search eliminated this possiblity and + // didn't slow anything down for the tests. + need_new_sb = true; + search_done = true; + } + else { + // Move to the next page making sure the new search position + // doesn't go past this superblock's bits. + pos += BLOCKS_PER_PAGE; + pos = ( pos < pos_base + blocks_per_sb ) ? pos : pos_base; + } + } + else { + // Reserved a memory location to allocate. + search_done = true; + allocation_done = true; + + uint32_t lg_block_size = block_size_id + LG_MIN_BLOCK_SIZE; + + p = m_data + ( size_t(sb_id) << m_lg_sb_size ) + + ( ( pos - pos_base ) << lg_block_size ); + + uint32_t used_bits = Kokkos::Impl::bit_count( prev_val ); + + if ( used_bits == 0 ) { + // This page was empty. Decrement the number of empty pages for + // the superblock. + atomic_fetch_sub( &m_sb_header(sb_id).m_empty_pages, 1 ); + } + else if ( used_bits == m_blocksize_info[block_size_id].m_page_full_level - 1 ) + { + // This page is full. Increment the number of full pages for + // the superblock. + uint32_t full_pages = atomic_fetch_add( &m_sb_header(sb_id).m_full_pages, 1 ); + + // This allocation made the superblock full, so a new one needs to be found. + if ( full_pages == m_blocksize_info[block_size_id].m_sb_full_level - 1 ) { + need_new_sb = true; + } + } + } + } + } + else { + // This is the first allocation for this block size. A superblock needs + // to be set as the active one. If this point is reached any other time, + // it is an error. + need_new_sb = true; + } + + if ( need_new_sb ) { + uint32_t new_sb_id = find_superblock( block_size_id, sb_id ); + + if ( new_sb_id == sb_id ) { + allocation_done = true; +#ifdef KOKKOS_MEMPOOL_PRINT_INFO + printf( "** No superblocks available. **\n" ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif +#endif + } + else { + sb_id = new_sb_id; + } + } + } + } +#ifdef KOKKOS_MEMPOOL_PRINT_INFO + else { + printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n", + alloc_size, m_sb_size); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + } +#endif + + return p; + } + + /// \brief Release allocated memory back to the pool. + /// \param alloc_ptr Pointer to chunk of memory previously allocated by + /// the allocator. + /// \param alloc_size Size of the allocated memory in number of bytes. + KOKKOS_FUNCTION + void deallocate( void * alloc_ptr, size_t alloc_size ) const + { + char * ap = static_cast<char *>( alloc_ptr ); + + // Only deallocate memory controlled by this pool. + if ( ap >= m_data && ap + alloc_size <= m_data + m_data_size ) { + // Get the superblock for the address. This can be calculated by math on + // the address since the superblocks are stored contiguously in one memory + // chunk. + uint32_t sb_id = ( ap - m_data ) >> m_lg_sb_size; + + // Get the starting position for this superblock's bits in the bitset. + uint32_t pos_base = sb_id << m_lg_max_sb_blocks; + + // Get the relative position for this memory location's bit in the bitset. + uint32_t offset = ( ap - m_data ) - ( size_t(sb_id) << m_lg_sb_size ); + uint32_t lg_block_size = m_sb_header(sb_id).m_lg_block_size; + uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; + uint32_t pos_rel = offset >> lg_block_size; + + bool success; + unsigned prev_val; + + Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel ); + + // If the memory location was previously deallocated, do nothing. + if ( success ) { + uint32_t page_fill_level = Kokkos::Impl::bit_count( prev_val ); + + if ( page_fill_level == 1 ) { + // This page is now empty. Increment the number of empty pages for the + // superblock. + uint32_t empty_pages = atomic_fetch_add( &m_sb_header(sb_id).m_empty_pages, 1 ); + + if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) && + empty_pages == m_blocksize_info[block_size_id].m_pages_per_sb - 1 ) + { + // This deallocation caused the superblock to be empty. Change the + // superblock category from partially full to empty. + unsigned pos = block_size_id * m_ceil_num_sb + sb_id; + + if ( m_partfull_sb.reset( pos ) ) { + // Reset the empty pages and block size for the superblock. + volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) ); + volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) ); + + memory_fence(); + + m_empty_sb.set( sb_id ); + } + } + } + else if ( page_fill_level == m_blocksize_info[block_size_id].m_page_full_level ) { + // This page is no longer full. Decrement the number of full pages for + // the superblock. + uint32_t full_pages = atomic_fetch_sub( &m_sb_header(sb_id).m_full_pages, 1 ); + + if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) && + full_pages == m_blocksize_info[block_size_id].m_sb_full_level ) + { + // This deallocation caused the number of full pages to decrease below + // the full threshold. Change the superblock category from full to + // partially full. + unsigned pos = block_size_id * m_ceil_num_sb + sb_id; + m_partfull_sb.set( pos ); + } + } + } + } +#ifdef KOKKOS_MEMPOOL_PRINTERR + else { + printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n", + reinterpret_cast<uint64_t>( alloc_ptr ) ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + } +#endif + } + + /// \brief Tests if the memory pool has no more memory available to allocate. + KOKKOS_INLINE_FUNCTION + bool is_empty() const + { + // The allocator is empty if all superblocks are full. A superblock is + // full if it has >= 80% of its pages allocated. + + // Look at all the superblocks. If one is not full, then the allocator + // isn't empty. + for ( size_t i = 0; i < m_num_sb; ++i ) { + uint32_t lg_block_size = m_sb_header(i).m_lg_block_size; + + // A superblock only has a block size of 0 when it is empty. + if ( lg_block_size == 0 ) return false; + + uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; + uint32_t full_pages = volatile_load( &m_sb_header(i).m_full_pages ); + + if ( full_pages < m_blocksize_info[block_size_id].m_sb_full_level ) return false; + } + + // All the superblocks were full. The allocator is empty. + return true; + } + + // The following functions are used for debugging. + void print_status() const + { + printf( "\n" ); + +#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO + typename SBHeaderView::HostMirror host_sb_header = create_mirror_view(m_sb_header); + deep_copy( host_sb_header, m_sb_header ); + + UInt32View num_allocated_blocks( "Allocated Blocks", m_num_sb ); + + // Count the number of allocated blocks per superblock. + { + MempoolImpl::count_allocated_blocks< UInt32View, SBHeaderView, MempoolBitset > + mch( m_num_sb, num_allocated_blocks, m_sb_header, + m_sb_blocks, m_sb_size, m_lg_max_sb_blocks ); + } + + typename UInt32View::HostMirror host_num_allocated_blocks = + create_mirror_view(num_allocated_blocks); + deep_copy( host_num_allocated_blocks, num_allocated_blocks ); + + // Print header info of all superblocks. + printf( "SB_ID SIZE ACTIVE EMPTY_PAGES FULL_PAGES USED_BLOCKS\n" ); + for ( size_t i = 0; i < m_num_sb; ++i ) { + printf( "%5zu %4u %6d %11u %10u %10u\n", i, + host_sb_header(i).m_lg_block_size, host_sb_header(i).m_is_active, + host_sb_header(i).m_empty_pages, host_sb_header(i).m_full_pages, + host_num_allocated_blocks(i) ); + } + + printf( "\n" ); +#endif + + UInt32View page_histogram( "Page Histogram", 33 ); + + // Get a View version of the blocksize info. + typedef View< BlockSizeHeader *, device_type > BSHeaderView; + BSHeaderView blocksize_info( "BlockSize Headers", MAX_BLOCK_SIZES ); + + Kokkos::Impl::DeepCopy< backend_memory_space, Kokkos::HostSpace > + dc( blocksize_info.ptr_on_device(), m_blocksize_info, + sizeof(BlockSizeHeader) * m_num_block_size ); + + Kokkos::pair< double, uint32_t > result = Kokkos::pair< double, uint32_t >( 0.0, 0 ); + + // Create the page histogram. + { + MempoolImpl::create_histogram< UInt32View, BSHeaderView, SBHeaderView, MempoolBitset > + mch( 0, m_num_sb, page_histogram, blocksize_info, m_sb_header, m_sb_blocks, + m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result ); + } + + typename UInt32View::HostMirror host_page_histogram = create_mirror_view(page_histogram); + deep_copy( host_page_histogram, page_histogram ); + + // Find the used and total pages and blocks. + uint32_t used_pages = 0; + uint32_t used_blocks = 0; + for ( uint32_t i = 1; i < 33; ++i ) { + used_pages += host_page_histogram(i); + used_blocks += i * host_page_histogram(i); + } + uint32_t total_pages = used_pages + host_page_histogram(0); + + unsigned num_empty_sb = m_empty_sb.count(); + unsigned num_non_empty_sb = m_num_sb - num_empty_sb; + unsigned num_partfull_sb = m_partfull_sb.count(); + + uint32_t total_blocks = result.second; + double ave_sb_full = num_non_empty_sb == 0 ? 0.0 : result.first / num_non_empty_sb; + double percent_used_sb = double( m_num_sb - num_empty_sb ) / m_num_sb; + double percent_used_pages = total_pages == 0 ? 0.0 : double(used_pages) / total_pages; + double percent_used_blocks = total_blocks == 0 ? 0.0 : double(used_blocks) / total_blocks; + + // Count active superblocks. + typename UInt32View::HostMirror host_active = create_mirror_view(m_active); + deep_copy(host_active, m_active); + + unsigned num_active_sb = 0; + for ( size_t i = 0; i < m_num_block_size; ++i ) { + num_active_sb += host_active(i) != INVALID_SUPERBLOCK; + } + +#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS + // Print active superblocks. + printf( "BS_ID SB_ID\n" ); + for ( size_t i = 0; i < m_num_block_size; ++i ) { + uint32_t sb_id = host_active(i); + + if ( sb_id == INVALID_SUPERBLOCK ) { + printf( "%5zu I\n", i ); + } + else if ( sb_id == SUPERBLOCK_LOCK ) { + printf( "%5zu L\n", i ); + } + else { + printf( "%5zu %7u\n", i, sb_id ); + } + } + printf( "\n" ); + fflush( stdout ); +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO + // Print the summary page histogram. + printf( "USED_BLOCKS PAGE_COUNT\n" ); + for ( uint32_t i = 0; i < 33; ++i ) { + printf( "%10u %10u\n", i, host_page_histogram[i] ); + } + printf( "\n" ); +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO + // Print the page histogram for a few individual superblocks. +// const uint32_t num_sb_id = 2; +// uint32_t sb_id[num_sb_id] = { 0, 10 }; + const uint32_t num_sb_id = 1; + uint32_t sb_id[num_sb_id] = { 0 }; + + for ( uint32_t i = 0; i < num_sb_id; ++i ) { + deep_copy( page_histogram, 0 ); + + { + MempoolImpl::create_histogram< UInt32View, BSHeaderView, SBHeaderView, MempoolBitset > + mch( sb_id[i], sb_id[i] + 1, page_histogram, blocksize_info, m_sb_header, + m_sb_blocks, m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result ); + } + + deep_copy( host_page_histogram, page_histogram ); + + printf( "SB_ID USED_BLOCKS PAGE_COUNT\n" ); + for ( uint32_t j = 0; j < 33; ++j ) { + printf( "%5u %10u %10u\n", sb_id[i], j, host_page_histogram[j] ); + } + printf( "\n" ); + } + +/* + // Print the blocks used for each page of a few individual superblocks. + for ( uint32_t i = 0; i < num_sb_id; ++i ) { + uint32_t lg_block_size = host_sb_header(sb_id[i]).m_lg_block_size; + if ( lg_block_size != 0 ) { + printf( "SB_ID BLOCK ID USED_BLOCKS\n" ); + + uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; + uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + + for ( uint32_t j = 0; j < pages_per_sb; ++j ) { + unsigned start_pos = ( sb_id[i] << m_lg_max_sb_blocks ) + j * BLOCKS_PER_PAGE; + unsigned end_pos = start_pos + BLOCKS_PER_PAGE; + uint32_t num_allocated_blocks = 0; + + for ( unsigned k = start_pos; k < end_pos; ++k ) { + num_allocated_blocks += m_sb_blocks.test( k ); + } + + printf( "%5u %8u %11u\n", sb_id[i], j, num_allocated_blocks ); + } + + printf( "\n" ); + } + } +*/ +#endif + + printf( " Used blocks: %10u / %10u = %10.6lf\n", used_blocks, total_blocks, + percent_used_blocks ); + printf( " Used pages: %10u / %10u = %10.6lf\n", used_pages, total_pages, + percent_used_pages ); + printf( " Used SB: %10zu / %10zu = %10.6lf\n", m_num_sb - num_empty_sb, m_num_sb, + percent_used_sb ); + printf( " Active SB: %10u\n", num_active_sb ); + printf( " Empty SB: %10u\n", num_empty_sb ); + printf( " Partfull SB: %10u\n", num_partfull_sb ); + printf( " Full SB: %10lu\n", + m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb ); + printf( "Ave. SB Full %%: %10.6lf\n", ave_sb_full ); + printf( "\n" ); + fflush( stdout ); + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + } + + KOKKOS_INLINE_FUNCTION + size_t get_min_block_size() const { return MIN_BLOCK_SIZE; } + + size_t get_mem_size() const { return m_data_size; } + +private: + /// \brief Returns the index into the active array for the given size. + /// + /// Computes log2 of the largest power of two >= the given size + /// ( ie ceil( log2(size) ) ) shifted by LG_MIN_BLOCK_SIZE. + KOKKOS_FORCEINLINE_FUNCTION + int get_block_size_index( const size_t size ) const + { + // We know the size fits in a 32 bit unsigned because the size of a + // superblock is limited to 2^31, so casting to an unsigned is safe. + + // Find the most significant nonzero bit. + uint32_t first_nonzero_bit = + Kokkos::Impl::bit_scan_reverse( static_cast<unsigned>( size ) ); + + // If size is an integral power of 2, ceil( log2(size) ) is equal to the + // most significant nonzero bit. Otherwise, you need to add 1. Since the + // minimum block size is MIN_BLOCK_SIZE, make sure ceil( log2(size) ) is at + // least LG_MIN_BLOCK_SIZE. + uint32_t lg2_size = first_nonzero_bit + !Kokkos::Impl::is_integral_power_of_two( size ); + lg2_size = lg2_size > LG_MIN_BLOCK_SIZE ? lg2_size : LG_MIN_BLOCK_SIZE; + + // Return ceil( log2(size) ) shifted so that the value for MIN_BLOCK_SIZE + // is 0. + return lg2_size - LG_MIN_BLOCK_SIZE; + } + + /// \brief Finds a superblock with free space to become a new active superblock. + /// + /// If this function is called, the current active superblock needs to be replaced + /// because it is full. Initially, only the thread that sets the active superblock + /// to full calls this function. Other threads can still allocate from the "full" + /// active superblock because a full superblock still has locations available. If + /// a thread tries to allocate from the active superblock when it has no free + /// locations, then that thread will call this function, too, and spin on a lock + /// waiting until the active superblock has been replaced. + KOKKOS_FUNCTION + uint32_t find_superblock( int block_size_id, uint32_t old_sb ) const + { + // Try to grab the lock on the head. + uint32_t lock_sb = + Kokkos::atomic_compare_exchange( &m_active(block_size_id), old_sb, SUPERBLOCK_LOCK ); + + // Initialize the new superblock to be the previous one so the previous + // superblock is returned if a new superblock can't be found. + uint32_t new_sb = lock_sb; + + if ( lock_sb == old_sb ) { + // This thread has the lock. + + // 1. Look for a partially filled superblock that is of the right block + // size. + + size_t max_tries = m_ceil_num_sb >> LG_BLOCKS_PER_PAGE; + size_t tries = 0; + bool search_done = false; + + // Set the starting search position to the beginning of this block + // size's bitset. + unsigned pos = block_size_id * m_ceil_num_sb; + + while (!search_done) { + bool success = false; + unsigned prev_val; + + Kokkos::tie( success, pos ) = m_partfull_sb.reset_any_in_word( pos, prev_val ); + + if ( !success ) { + if ( ++tries >= max_tries ) { + // Exceeded number of words for this block size's bitset. + search_done = true; + } + else { + pos += BLOCKS_PER_PAGE; + } + } + else { + // Found a superblock. + search_done = true; + new_sb = pos - block_size_id * m_ceil_num_sb; + + // Assertions: + // 1. A different superblock than the current should be found. +#ifdef KOKKOS_MEMPOOL_PRINTERR + if ( new_sb == lock_sb ) { + printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n", + new_sb); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } +#endif + + // Set the head status for the superblock. + volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) ); + + // If there was a previous active superblock, mark it as not active. + // It is now in the full category and as such isn't tracked. + if ( lock_sb != INVALID_SUPERBLOCK ) { + volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); + } + + memory_fence(); + } + } + + // 2. Look for an empty superblock. + if ( new_sb == lock_sb ) { + tries = 0; + search_done = false; + + // Set the starting search position to the beginning of this block + // size's bitset. + pos = 0; + + while (!search_done) { + bool success = false; + unsigned prev_val; + + Kokkos::tie( success, pos ) = m_empty_sb.reset_any_in_word( pos, prev_val ); + + if ( !success ) { + if ( ++tries >= max_tries ) { + // Exceeded number of words for this block size's bitset. + search_done = true; + } + else { + pos += BLOCKS_PER_PAGE; + } + } + else { + // Found a superblock. + search_done = true; + new_sb = pos; + + // Assertions: + // 1. A different superblock than the current should be found. +#ifdef KOKKOS_MEMPOOL_PRINTERR + if ( new_sb == lock_sb ) { + printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n", + new_sb); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } +#endif + + // Set the empty pages, block size, and head status for the + // superblock. + volatile_store( &m_sb_header(new_sb).m_empty_pages, + m_blocksize_info[block_size_id].m_pages_per_sb ); + volatile_store( &m_sb_header(new_sb).m_lg_block_size, + block_size_id + LG_MIN_BLOCK_SIZE ); + volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) ); + + // If there was a previous active superblock, mark it as not active. + // It is now in the full category and as such isn't tracked. + if ( lock_sb != INVALID_SUPERBLOCK ) { + volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); + } + + memory_fence(); + } + } + } + + // Write the new active superblock to release the lock. + atomic_exchange( &m_active(block_size_id), new_sb ); + } + else { + // Either another thread has the lock and is switching the active superblock for + // this block size or another thread has already changed the active superblock + // since this thread read its value. Keep reading the active superblock until + // it isn't locked to get the new active superblock. + do { + new_sb = volatile_load( &m_active(block_size_id) ); + } while ( new_sb == SUPERBLOCK_LOCK ); + + // Assertions: + // 1. An invalid superblock should never be found here. + // 2. If the new superblock is the same as the previous superblock, the + // allocator is empty. +#ifdef KOKKOS_MEMPOOL_PRINTERR + if ( new_sb == INVALID_SUPERBLOCK ) { + printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" ); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + fflush( stdout ); +#endif + Kokkos::abort( "" ); + } +#endif + } + + return new_sb; + } + + /// Returns 64 bits from a clock register. + KOKKOS_FORCEINLINE_FUNCTION + uint64_t get_clock_register(void) const + { +#if defined( __CUDA_ARCH__ ) + // Return value of 64-bit hi-res clock register. + return clock64(); +#elif defined( __i386__ ) || defined( __x86_64 ) + // Return value of 64-bit hi-res clock register. + unsigned a, d; + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + return ( (uint64_t) a) | ( ( (uint64_t) d ) << 32 ); +#else + const uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + return ticks; +#endif + } +}; + +} // namespace Experimental +} // namespace Kokkos + +#ifdef KOKKOS_MEMPOOL_PRINTERR +#undef KOKKOS_MEMPOOL_PRINTERR +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_INFO +#undef KOKKOS_MEMPOOL_PRINT_INFO +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO +#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO +#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO +#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO +#endif + +#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO +#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO +#endif + +#undef KOKKOS_MEMPOOL_SB_FULL_FRACTION +#undef KOKKOS_MEMPOOL_PAGE_FULL_FRACTION + +#endif // KOKKOS_MEMORYPOOL_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5ee1f16fec854fc0ee45e39c488095fdee73ed4f --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_MEMORYTRAITS_HPP +#define KOKKOS_MEMORYTRAITS_HPP + +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Tags.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/** \brief Memory access traits for views, an extension point. + * + * These traits should be orthogonal. If there are dependencies then + * the MemoryTraits template must detect and enforce dependencies. + * + * A zero value is the default for a View, indicating that none of + * these traits are present. + */ +enum MemoryTraitsFlags + { Unmanaged = 0x01 + , RandomAccess = 0x02 + , Atomic = 0x04 + }; + +template < unsigned T > +struct MemoryTraits { + //! Tag this class as a kokkos memory traits: + typedef MemoryTraits memory_traits ; + + enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) }; + enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) }; + enum { Atomic = T & unsigned(Kokkos::Atomic) }; + +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +typedef Kokkos::MemoryTraits<0> MemoryManaged ; +typedef Kokkos::MemoryTraits< Kokkos::Unmanaged > MemoryUnmanaged ; +typedef Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::RandomAccess > MemoryRandomAccess ; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Memory alignment settings + * + * Sets global value for memory alignment. Must be a power of two! + * Enable compatibility of views from different devices with static stride. + * Use compiler flag to enable overwrites. + */ +enum { MEMORY_ALIGNMENT = +#if defined( KOKKOS_MEMORY_ALIGNMENT ) + ( 1 << Kokkos::Impl::integral_power_of_two( KOKKOS_MEMORY_ALIGNMENT ) ) +#else + ( 1 << Kokkos::Impl::integral_power_of_two( 128 ) ) +#endif + , MEMORY_ALIGNMENT_THRESHOLD = 4 + }; + + +} //namespace Impl +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_MEMORYTRAITS_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7be4f8245f98ea464d8a27313c13c7aa35be4e46 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -0,0 +1,189 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMP_HPP +#define KOKKOS_OPENMP_HPP + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) + +#include <omp.h> + +#include <cstddef> +#include <iosfwd> +#include <Kokkos_HostSpace.hpp> +#ifdef KOKKOS_HAVE_HBWSPACE +#include <Kokkos_HBWSpace.hpp> +#endif +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_Parallel.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_Layout.hpp> +#include <impl/Kokkos_Tags.hpp> + +#include <KokkosExp_MDRangePolicy.hpp> +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/// \class OpenMP +/// \brief Kokkos device for multicore processors in the host memory space. +class OpenMP { +public: + //------------------------------------ + //! \name Type declarations that all Kokkos devices must provide. + //@{ + + //! Tag this class as a kokkos execution space + typedef OpenMP execution_space ; + #ifdef KOKKOS_HAVE_HBWSPACE + typedef Experimental::HBWSpace memory_space ; + #else + typedef HostSpace memory_space ; + #endif + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef LayoutRight array_layout ; + typedef memory_space::size_type size_type ; + + typedef ScratchMemorySpace< OpenMP > scratch_memory_space ; + + //@} + //------------------------------------ + //! \name Functions that all Kokkos execution spaces must implement. + //@{ + + inline static bool in_parallel() { return omp_in_parallel(); } + + /** \brief Set the device in a "sleep" state. A noop for OpenMP. */ + static bool sleep(); + + /** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */ + static bool wake(); + + /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ + static void fence() {} + + /// \brief Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool detail = false ); + + /// \brief Free any resources being consumed by the device. + static void finalize(); + + /** \brief Initialize the device. + * + * 1) If the hardware locality library is enabled and OpenMP has not + * already bound threads then bind OpenMP threads to maximize + * core utilization and group for memory hierarchy locality. + * + * 2) Allocate a HostThread for each OpenMP thread to hold its + * topology and fan in/out data. + */ + static void initialize( unsigned thread_count = 0 , + unsigned use_numa_count = 0 , + unsigned use_cores_per_numa = 0 ); + + static int is_initialized(); + + /** \brief Return the maximum amount of concurrency. */ + static int concurrency(); + + //@} + //------------------------------------ + /** \brief This execution space has a topological thread pool which can be queried. + * + * All threads within a pool have a common memory space for which they are cache coherent. + * depth = 0 gives the number of threads in the whole pool. + * depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache. + * depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache. + */ + inline static int thread_pool_size( int depth = 0 ); + + /** \brief The rank of the executing thread in this thread pool */ + KOKKOS_INLINE_FUNCTION static int thread_pool_rank(); + + //------------------------------------ + + inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + + KOKKOS_INLINE_FUNCTION static + unsigned hardware_thread_id() { return thread_pool_rank(); } +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::OpenMP::memory_space + , Kokkos::OpenMP::scratch_memory_space + > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#include <OpenMP/Kokkos_OpenMPexec.hpp> +#include <OpenMP/Kokkos_OpenMP_Parallel.hpp> +#include <OpenMP/Kokkos_OpenMP_Task.hpp> + +/*--------------------------------------------------------------------------*/ + +#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */ +#endif /* #ifndef KOKKOS_OPENMP_HPP */ + + diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp new file mode 100644 index 0000000000000000000000000000000000000000..83436826f4aded7131802662327d6b80c5b5c785 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -0,0 +1,530 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +/// \file Kokkos_Pair.hpp +/// \brief Declaration and definition of Kokkos::pair. +/// +/// This header file declares and defines Kokkos::pair and its related +/// nonmember functions. + +#ifndef KOKKOS_PAIR_HPP +#define KOKKOS_PAIR_HPP + +#include <Kokkos_Macros.hpp> +#include <utility> + +namespace Kokkos { +/// \struct pair +/// \brief Replacement for std::pair that works on CUDA devices. +/// +/// The instance methods of std::pair, including its constructors, are +/// not marked as <tt>__device__</tt> functions. Thus, they cannot be +/// called on a CUDA device, such as an NVIDIA GPU. This struct +/// implements the same interface as std::pair, but can be used on a +/// CUDA device as well as on the host. +template <class T1, class T2> +struct pair +{ + //! The first template parameter of this class. + typedef T1 first_type; + //! The second template parameter of this class. + typedef T2 second_type; + + //! The first element of the pair. + first_type first; + //! The second element of the pair. + second_type second; + + /// \brief Default constructor. + /// + /// This calls the default constructors of T1 and T2. It won't + /// compile if those default constructors are not defined and + /// public. + KOKKOS_FORCEINLINE_FUNCTION + pair() + : first(), second() + {} + + /// \brief Constructor that takes both elements of the pair. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + KOKKOS_FORCEINLINE_FUNCTION + pair(first_type const& f, second_type const& s) + : first(f), second(s) + {} + + /// \brief Copy constructor. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair( const pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Copy constructor. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair( const volatile pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Assignment operator. + /// + /// This calls the assignment operators of T1 and T2. It won't + /// compile if the assignment operators are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair<T1, T2> & operator=(const pair<U,V> &p) + { + first = p.first; + second = p.second; + return *this; + } + + + /// \brief Assignment operator, for volatile <tt>*this</tt>. + /// + /// \param p [in] Input; right-hand side of the assignment. + /// + /// This calls the assignment operators of T1 and T2. It will not + /// compile if the assignment operators are not defined and public. + /// + /// This operator returns \c void instead of <tt>volatile pair<T1, + /// T2>& </tt>. See Kokkos Issue #177 for the explanation. In + /// practice, this means that you should not chain assignments with + /// volatile lvalues. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + void operator=(const volatile pair<U,V> &p) volatile + { + first = p.first; + second = p.second; + // We deliberately do not return anything here. See explanation + // in public documentation above. + } + + // from std::pair<U,V> + template <class U, class V> + pair( const std::pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Return the std::pair version of this object. + /// + /// This is <i>not</i> a device function; you may not call it on a + /// CUDA device. It is meant to be called on the host, if the user + /// wants an std::pair instead of a Kokkos::pair. + /// + /// \note This is not a conversion operator, since defining a + /// conversion operator made the relational operators have + /// ambiguous definitions. + std::pair<T1,T2> to_std_pair() const + { return std::make_pair(first,second); } +}; + +template <class T1, class T2> +struct pair<T1&, T2&> +{ + //! The first template parameter of this class. + typedef T1& first_type; + //! The second template parameter of this class. + typedef T2& second_type; + + //! The first element of the pair. + first_type first; + //! The second element of the pair. + second_type second; + + /// \brief Constructor that takes both elements of the pair. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + KOKKOS_FORCEINLINE_FUNCTION + pair(first_type f, second_type s) + : first(f), second(s) + {} + + /// \brief Copy constructor. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair( const pair<U,V> &p) + : first(p.first), second(p.second) + {} + + // from std::pair<U,V> + template <class U, class V> + pair( const std::pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Assignment operator. + /// + /// This calls the assignment operators of T1 and T2. It won't + /// compile if the assignment operators are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair<first_type, second_type> & operator=(const pair<U,V> &p) + { + first = p.first; + second = p.second; + return *this; + } + + /// \brief Return the std::pair version of this object. + /// + /// This is <i>not</i> a device function; you may not call it on a + /// CUDA device. It is meant to be called on the host, if the user + /// wants an std::pair instead of a Kokkos::pair. + /// + /// \note This is not a conversion operator, since defining a + /// conversion operator made the relational operators have + /// ambiguous definitions. + std::pair<T1,T2> to_std_pair() const + { return std::make_pair(first,second); } +}; + +template <class T1, class T2> +struct pair<T1, T2&> +{ + //! The first template parameter of this class. + typedef T1 first_type; + //! The second template parameter of this class. + typedef T2& second_type; + + //! The first element of the pair. + first_type first; + //! The second element of the pair. + second_type second; + + /// \brief Constructor that takes both elements of the pair. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + KOKKOS_FORCEINLINE_FUNCTION + pair(first_type const& f, second_type s) + : first(f), second(s) + {} + + /// \brief Copy constructor. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair( const pair<U,V> &p) + : first(p.first), second(p.second) + {} + + // from std::pair<U,V> + template <class U, class V> + pair( const std::pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Assignment operator. + /// + /// This calls the assignment operators of T1 and T2. It won't + /// compile if the assignment operators are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair<first_type, second_type> & operator=(const pair<U,V> &p) + { + first = p.first; + second = p.second; + return *this; + } + + /// \brief Return the std::pair version of this object. + /// + /// This is <i>not</i> a device function; you may not call it on a + /// CUDA device. It is meant to be called on the host, if the user + /// wants an std::pair instead of a Kokkos::pair. + /// + /// \note This is not a conversion operator, since defining a + /// conversion operator made the relational operators have + /// ambiguous definitions. + std::pair<T1,T2> to_std_pair() const + { return std::make_pair(first,second); } +}; + +template <class T1, class T2> +struct pair<T1&, T2> +{ + //! The first template parameter of this class. + typedef T1& first_type; + //! The second template parameter of this class. + typedef T2 second_type; + + //! The first element of the pair. + first_type first; + //! The second element of the pair. + second_type second; + + /// \brief Constructor that takes both elements of the pair. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + KOKKOS_FORCEINLINE_FUNCTION + pair(first_type f, second_type const& s) + : first(f), second(s) + {} + + /// \brief Copy constructor. + /// + /// This calls the copy constructors of T1 and T2. It won't compile + /// if those copy constructors are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair( const pair<U,V> &p) + : first(p.first), second(p.second) + {} + + // from std::pair<U,V> + template <class U, class V> + pair( const std::pair<U,V> &p) + : first(p.first), second(p.second) + {} + + /// \brief Assignment operator. + /// + /// This calls the assignment operators of T1 and T2. It won't + /// compile if the assignment operators are not defined and public. + template <class U, class V> + KOKKOS_FORCEINLINE_FUNCTION + pair<first_type, second_type> & operator=(const pair<U,V> &p) + { + first = p.first; + second = p.second; + return *this; + } + + /// \brief Return the std::pair version of this object. + /// + /// This is <i>not</i> a device function; you may not call it on a + /// CUDA device. It is meant to be called on the host, if the user + /// wants an std::pair instead of a Kokkos::pair. + /// + /// \note This is not a conversion operator, since defining a + /// conversion operator made the relational operators have + /// ambiguous definitions. + std::pair<T1,T2> to_std_pair() const + { return std::make_pair(first,second); } +}; + +//! Equality operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator== (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return lhs.first==rhs.first && lhs.second==rhs.second; } + +//! Inequality operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator!= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return !(lhs==rhs); } + +//! Less-than operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator< (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return lhs.first<rhs.first || (!(rhs.first<lhs.first) && lhs.second<rhs.second); } + +//! Less-than-or-equal-to operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator<= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return !(rhs<lhs); } + +//! Greater-than operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator> (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return rhs<lhs; } + +//! Greater-than-or-equal-to operator for Kokkos::pair. +template <class T1, class T2> +KOKKOS_FORCEINLINE_FUNCTION +bool operator>= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) +{ return !(lhs<rhs); } + +/// \brief Return a new pair. +/// +/// This is a "nonmember constructor" for Kokkos::pair. It works just +/// like std::make_pair. +template <class T1,class T2> +KOKKOS_FORCEINLINE_FUNCTION +pair<T1,T2> make_pair (T1 x, T2 y) +{ return ( pair<T1,T2>(x,y) ); } + +/// \brief Return a pair of references to the input arguments. +/// +/// This compares to std::tie (new in C++11). You can use it to +/// assign to two variables at once, from the result of a function +/// that returns a pair. For example (<tt>__device__</tt> and +/// <tt>__host__</tt> attributes omitted for brevity): +/// \code +/// // Declaration of the function to call. +/// // First return value: operation count. +/// // Second return value: whether all operations succeeded. +/// Kokkos::pair<int, bool> someFunction (); +/// +/// // Code that uses Kokkos::tie. +/// int myFunction () { +/// int count = 0; +/// bool success = false; +/// +/// // This assigns to both count and success. +/// Kokkos::tie (count, success) = someFunction (); +/// +/// if (! success) { +/// // ... Some operation failed; +/// // take corrective action ... +/// } +/// return count; +/// } +/// \endcode +/// +/// The line that uses tie() could have been written like this: +/// \code +/// Kokkos::pair<int, bool> result = someFunction (); +/// count = result.first; +/// success = result.second; +/// \endcode +/// +/// Using tie() saves two lines of code and avoids a copy of each +/// element of the pair. The latter could be significant if one or +/// both elements of the pair are more substantial objects than \c int +/// or \c bool. +template <class T1,class T2> +KOKKOS_FORCEINLINE_FUNCTION +pair<T1 &,T2 &> tie (T1 & x, T2 & y) +{ return ( pair<T1 &,T2 &>(x,y) ); } + +// +// Specialization of Kokkos::pair for a \c void second argument. This +// is not actually a "pair"; it only contains one element, the first. +// +template <class T1> +struct pair<T1,void> +{ + typedef T1 first_type; + typedef void second_type; + + first_type first; + enum { second = 0 }; + + KOKKOS_FORCEINLINE_FUNCTION + pair() + : first() + {} + + KOKKOS_FORCEINLINE_FUNCTION + pair(const first_type & f) + : first(f) + {} + + KOKKOS_FORCEINLINE_FUNCTION + pair(const first_type & f, int) + : first(f) + {} + + template <class U> + KOKKOS_FORCEINLINE_FUNCTION + pair( const pair<U,void> &p) + : first(p.first) + {} + + template <class U> + KOKKOS_FORCEINLINE_FUNCTION + pair<T1, void> & operator=(const pair<U,void> &p) + { + first = p.first; + return *this; + } +}; + +// +// Specialization of relational operators for Kokkos::pair<T1,void>. +// + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator== (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return lhs.first==rhs.first; } + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator!= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return !(lhs==rhs); } + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator< (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return lhs.first<rhs.first; } + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator<= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return !(rhs<lhs); } + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator> (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return rhs<lhs; } + +template <class T1> +KOKKOS_FORCEINLINE_FUNCTION +bool operator>= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) +{ return !(lhs<rhs); } + +} // namespace Kokkos + + +#endif //KOKKOS_PAIR_HPP diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..588dc90af38c14c691c39fc88d22efaba51f6be4 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -0,0 +1,527 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_Parallel.hpp +/// \brief Declaration of parallel operators + +#ifndef KOKKOS_PARALLEL_HPP +#define KOKKOS_PARALLEL_HPP + +#include <cstddef> +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_View.hpp> +#include <Kokkos_ExecPolicy.hpp> + +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <typeinfo> +#endif + +#include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +#ifdef KOKKOS_HAVE_DEBUG +#include<iostream> +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +/** \brief Given a Functor and Execution Policy query an execution space. + * + * if the Policy has an execution space use that + * else if the Functor has an execution_space use that + * else if the Functor has a device_type use that for backward compatibility + * else use the default + */ +template< class Functor + , class Policy + , class EnableFunctor + , class EnablePolicy + > +struct FunctorPolicyExecutionSpace { + typedef Kokkos::DefaultExecutionSpace execution_space ; +}; + +template< class Functor , class Policy > +struct FunctorPolicyExecutionSpace + < Functor , Policy + , typename enable_if_type< typename Functor::device_type >::type + , typename enable_if_type< typename Policy ::execution_space >::type + > +{ + typedef typename Policy ::execution_space execution_space ; +}; + +template< class Functor , class Policy > +struct FunctorPolicyExecutionSpace + < Functor , Policy + , typename enable_if_type< typename Functor::execution_space >::type + , typename enable_if_type< typename Policy ::execution_space >::type + > +{ + typedef typename Policy ::execution_space execution_space ; +}; + +template< class Functor , class Policy , class EnableFunctor > +struct FunctorPolicyExecutionSpace + < Functor , Policy + , EnableFunctor + , typename enable_if_type< typename Policy::execution_space >::type + > +{ + typedef typename Policy ::execution_space execution_space ; +}; + +template< class Functor , class Policy , class EnablePolicy > +struct FunctorPolicyExecutionSpace + < Functor , Policy + , typename enable_if_type< typename Functor::device_type >::type + , EnablePolicy + > +{ + typedef typename Functor::device_type execution_space ; +}; + +template< class Functor , class Policy , class EnablePolicy > +struct FunctorPolicyExecutionSpace + < Functor , Policy + , typename enable_if_type< typename Functor::execution_space >::type + , EnablePolicy + > +{ + typedef typename Functor::execution_space execution_space ; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/** \brief Execute \c functor in parallel according to the execution \c policy. + * + * A "functor" is a class containing the function to execute in parallel, + * data needed for that execution, and an optional \c execution_space + * typedef. Here is an example functor for parallel_for: + * + * \code + * class FunctorType { + * public: + * typedef ... execution_space ; + * void operator() ( WorkType iwork ) const ; + * }; + * \endcode + * + * In the above example, \c WorkType is any integer type for which a + * valid conversion from \c size_t to \c IntType exists. Its + * <tt>operator()</tt> method defines the operation to parallelize, + * over the range of integer indices <tt>iwork=[0,work_count-1]</tt>. + * This compares to a single iteration \c iwork of a \c for loop. + * If \c execution_space is not defined DefaultExecutionSpace will be used. + */ +template< class ExecPolicy , class FunctorType > +inline +void parallel_for( const ExecPolicy & policy + , const FunctorType & functor + , const std::string& str = "" + , typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0 + ) +{ +#if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); + } +#endif + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy ); + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + + closure.execute(); + +#if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif +} + +template< class FunctorType > +inline +void parallel_for( const size_t work_count + , const FunctorType & functor + , const std::string& str = "" + ) +{ + typedef typename + Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space + execution_space ; + typedef RangePolicy< execution_space > policy ; + +#if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); + } +#endif + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) ); + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + + closure.execute(); + +#if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif +} + +template< class ExecPolicy , class FunctorType > +inline +void parallel_for( const std::string & str + , const ExecPolicy & policy + , const FunctorType & functor ) +{ + #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES + Kokkos::fence(); + std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl; + #endif + + parallel_for(policy,functor,str); + + #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES + Kokkos::fence(); + std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl; + #endif + (void) str; +} + +} + +#include <Kokkos_Parallel_Reduce.hpp> +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/// \fn parallel_scan +/// \tparam ExecutionPolicy The execution policy type. +/// \tparam FunctorType The scan functor type. +/// +/// \param policy [in] The execution policy. +/// \param functor [in] The scan functor. +/// +/// This function implements a parallel scan pattern. The scan can +/// be either inclusive or exclusive, depending on how you implement +/// the scan functor. +/// +/// A scan functor looks almost exactly like a reduce functor, except +/// that its operator() takes a third \c bool argument, \c final_pass, +/// which indicates whether this is the last pass of the scan +/// operation. We will show below how to use the \c final_pass +/// argument to control whether the scan is inclusive or exclusive. +/// +/// Here is the minimum required interface of a scan functor for a POD +/// (plain old data) value type \c PodType. That is, the result is a +/// View of zero or more PodType. It is also possible for the result +/// to be an array of (same-sized) arrays of PodType, but we do not +/// show the required interface for that here. +/// \code +/// template< class ExecPolicy , class FunctorType > +/// class ScanFunctor { +/// public: +/// // The Kokkos device type +/// typedef ... execution_space; +/// // Type of an entry of the array containing the result; +/// // also the type of each of the entries combined using +/// // operator() or join(). +/// typedef PodType value_type; +/// +/// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const; +/// void init (value_type& update) const; +/// void join (volatile value_type& update, volatile const value_type& input) const +/// }; +/// \endcode +/// +/// Here is an example of a functor which computes an inclusive plus-scan +/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this +/// scan will overwrite that array with [1, 3, 6, 10]. +/// +/// \code +/// template<class SpaceType> +/// class InclScanFunctor { +/// public: +/// typedef SpaceType execution_space; +/// typedef int value_type; +/// typedef typename SpaceType::size_type size_type; +/// +/// InclScanFunctor( Kokkos::View<value_type*, execution_space> x +/// , Kokkos::View<value_type*, execution_space> y ) : m_x(x), m_y(y) {} +/// +/// void operator () (const size_type i, value_type& update, const bool final_pass) const { +/// update += m_x(i); +/// if (final_pass) { +/// m_y(i) = update; +/// } +/// } +/// void init (value_type& update) const { +/// update = 0; +/// } +/// void join (volatile value_type& update, volatile const value_type& input) const { +/// update += input; +/// } +/// +/// private: +/// Kokkos::View<value_type*, execution_space> m_x; +/// Kokkos::View<value_type*, execution_space> m_y; +/// }; +/// \endcode +/// +/// Here is an example of a functor which computes an <i>exclusive</i> +/// scan of an array of \c int, in place. In operator(), note both +/// that the final_pass test and the update have switched places, and +/// the use of a temporary. If given an array [1, 2, 3, 4], this scan +/// will overwrite that array with [0, 1, 3, 6]. +/// +/// \code +/// template<class SpaceType> +/// class ExclScanFunctor { +/// public: +/// typedef SpaceType execution_space; +/// typedef int value_type; +/// typedef typename SpaceType::size_type size_type; +/// +/// ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {} +/// +/// void operator () (const size_type i, value_type& update, const bool final_pass) const { +/// const value_type x_i = x_(i); +/// if (final_pass) { +/// x_(i) = update; +/// } +/// update += x_i; +/// } +/// void init (value_type& update) const { +/// update = 0; +/// } +/// void join (volatile value_type& update, volatile const value_type& input) const { +/// update += input; +/// } +/// +/// private: +/// Kokkos::View<value_type*, execution_space> x_; +/// }; +/// \endcode +/// +/// Here is an example of a functor which builds on the above +/// exclusive scan example, to compute an offsets array from a +/// population count array, in place. We assume that the pop count +/// array has an extra entry at the end to store the final count. If +/// given an array [1, 2, 3, 4, 0], this scan will overwrite that +/// array with [0, 1, 3, 6, 10]. +/// +/// \code +/// template<class SpaceType> +/// class OffsetScanFunctor { +/// public: +/// typedef SpaceType execution_space; +/// typedef int value_type; +/// typedef typename SpaceType::size_type size_type; +/// +/// // lastIndex_ is the last valid index (zero-based) of x. +/// // If x has length zero, then lastIndex_ won't be used anyway. +/// OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x +/// , Kokkos::View<value_type*, execution_space> y ) +/// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1) +/// {} +/// +/// void operator () (const size_type i, int& update, const bool final_pass) const { +/// if (final_pass) { +/// m_y(i) = update; +/// } +/// update += m_x(i); +/// // The last entry of m_y gets the final sum. +/// if (final_pass && i == last_index_) { +/// m_y(i+1) = update; +/// } +/// } +/// void init (value_type& update) const { +/// update = 0; +/// } +/// void join (volatile value_type& update, volatile const value_type& input) const { +/// update += input; +/// } +/// +/// private: +/// Kokkos::View<value_type*, execution_space> m_x; +/// Kokkos::View<value_type*, execution_space> m_y; +/// const size_type last_index_; +/// }; +/// \endcode +/// +template< class ExecutionPolicy , class FunctorType > +inline +void parallel_scan( const ExecutionPolicy & policy + , const FunctorType & functor + , const std::string& str = "" + , typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0 + ) +{ +#if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); + } +#endif + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Impl::ParallelScan< FunctorType , ExecutionPolicy > closure( functor , policy ); + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + + closure.execute(); + +#if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelScan(kpID); + } +#endif + +} + +template< class FunctorType > +inline +void parallel_scan( const size_t work_count + , const FunctorType & functor + , const std::string& str = "" ) +{ + typedef typename + Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space + execution_space ; + + typedef Kokkos::RangePolicy< execution_space > policy ; + +#if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); + } +#endif + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) ); + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + + closure.execute(); + +#if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelScan(kpID); + } +#endif + +} + +template< class ExecutionPolicy , class FunctorType > +inline +void parallel_scan( const std::string& str + , const ExecutionPolicy & policy + , const FunctorType & functor) +{ + #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES + Kokkos::fence(); + std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl; + #endif + + parallel_scan(policy,functor,str); + + #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES + Kokkos::fence(); + std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl; + #endif + (void) str; +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class Enable = void > +struct FunctorTeamShmemSize +{ + KOKKOS_INLINE_FUNCTION static size_t value( const FunctorType & , int ) { return 0 ; } +}; + +template< class FunctorType > +struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type > +{ + static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; } +}; + +template< class FunctorType > +struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type > +{ + static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp new file mode 100644 index 0000000000000000000000000000000000000000..695bc79a1ab900405a160843d8777651dc63cb22 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -0,0 +1,1240 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +namespace Kokkos { + + +template<class T, class Enable = void> +struct is_reducer_type { + enum { value = 0 }; +}; + + +template<class T> +struct is_reducer_type<T,typename std::enable_if< + std::is_same<T,typename T::reducer_type>::value + >::type> { + enum { value = 1 }; +}; + +namespace Experimental { + + +template<class Scalar,class Space = HostSpace> +struct Sum { +public: + //Required + typedef Sum reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return static_cast<value_type>(0); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return value_type(); + } + }; + +public: + + Sum(value_type& result_): + init_value(InitWrapper<value_type>::value()),result(&result_) {} + Sum(const result_view_type& result_): + init_value(InitWrapper<value_type>::value()),result(result_) {} + Sum(value_type& result_, const value_type& init_value_): + init_value(init_value_),result(&result_) {} + Sum(const result_view_type& result_, const value_type& init_value_): + init_value(init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest += src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest += src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar,class Space = HostSpace> +struct Prod { +public: + //Required + typedef Prod reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return static_cast<value_type>(1); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return value_type(); + } + }; + +public: + + Prod(value_type& result_): + init_value(InitWrapper<value_type>::value()),result(&result_) {} + Prod(const result_view_type& result_): + init_value(InitWrapper<value_type>::value()),result(result_) {} + Prod(value_type& result_, const value_type& init_value_): + init_value(init_value_),result(&result_) {} + Prod(const result_view_type& result_, const value_type& init_value_): + init_value(init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest *= src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest *= src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct Min { +public: + //Required + typedef Min reducer_type; + typedef typename std::remove_cv<Scalar>::type value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<value_type>::max(); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return value_type(); + } + }; + +public: + + Min(value_type& result_): + init_value(InitWrapper<value_type>::value()),result(&result_) {} + Min(const result_view_type& result_): + init_value(InitWrapper<value_type>::value()),result(result_) {} + Min(value_type& result_, const value_type& init_value_): + init_value(init_value_),result(&result_) {} + Min(const result_view_type& result_, const value_type& init_value_): + init_value(init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src < dest ) + dest = src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src < dest ) + dest = src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct Max { +public: + //Required + typedef Max reducer_type; + typedef typename std::remove_cv<Scalar>::type value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<value_type>::min(); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return value_type(); + } + }; + +public: + + Max(value_type& result_): + init_value(InitWrapper<value_type>::value()),result(&result_) {} + Max(const result_view_type& result_): + init_value(InitWrapper<value_type>::value()),result(result_) {} + Max(value_type& result_, const value_type& init_value_): + init_value(init_value_),result(&result_) {} + Max(const result_view_type& result_, const value_type& init_value_): + init_value(init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src > dest ) + dest = src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src > dest ) + dest = src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct LAnd { +public: + //Required + typedef LAnd reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + +private: + result_view_type result; + +public: + + LAnd(value_type& result_):result(&result_) {} + LAnd(const result_view_type& result_):result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest && src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest && src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = 1; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct LOr { +public: + //Required + typedef LOr reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + +private: + result_view_type result; + +public: + + LOr(value_type& result_):result(&result_) {} + LOr(const result_view_type& result_):result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest || src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest || src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = 0; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct LXor { +public: + //Required + typedef LXor reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + +private: + result_view_type result; + +public: + + LXor(value_type& result_):result(&result_) {} + LXor(const result_view_type& result_):result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest? (!src) : src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest? (!src) : src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = 0; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct BAnd { +public: + //Required + typedef BAnd reducer_type; + typedef typename std::remove_cv<Scalar>::type value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + +public: + + BAnd(value_type& result_): + init_value(value_type() | (~value_type())),result(&result_) {} + BAnd(const result_view_type& result_): + init_value(value_type() | (~value_type())),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest & src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest & src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct BOr { +public: + //Required + typedef BOr reducer_type; + typedef typename std::remove_cv<Scalar>::type value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + +public: + + BOr(value_type& result_): + init_value(value_type() & (~value_type())),result(&result_) {} + BOr(const result_view_type& result_): + init_value(value_type() & (~value_type())),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest | src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest | src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Space = HostSpace> +struct BXor { +public: + //Required + typedef BXor reducer_type; + typedef typename std::remove_cv<Scalar>::type value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + value_type init_value; + +private: + result_view_type result; + +public: + + BXor(value_type& result_): + init_value(value_type() & (~value_type())),result(&result_) {} + BXor(const result_view_type& result_): + init_value(value_type() & (~value_type())),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest = dest ^ src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest = dest ^ src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Index> +struct ValLocScalar { + Scalar val; + Index loc; + + KOKKOS_INLINE_FUNCTION + void operator = (const ValLocScalar& rhs) { + val = rhs.val; + loc = rhs.loc; + } + + KOKKOS_INLINE_FUNCTION + void operator = (const volatile ValLocScalar& rhs) volatile { + val = rhs.val; + loc = rhs.loc; + } +}; + +template<class Scalar, class Index, class Space = HostSpace> +struct MinLoc { +private: + typedef typename std::remove_cv<Scalar>::type scalar_type; + typedef typename std::remove_cv<Index>::type index_type; + +public: + //Required + typedef MinLoc reducer_type; + typedef ValLocScalar<scalar_type,index_type> value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + scalar_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::max(); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + +public: + + MinLoc(value_type& result_): + init_value(InitWrapper<scalar_type>::value()),result(&result_) {} + MinLoc(const result_view_type& result_): + init_value(InitWrapper<scalar_type>::value()),result(result_) {} + MinLoc(value_type& result_, const scalar_type& init_value_): + init_value(init_value_),result(&result_) {} + MinLoc(const result_view_type& result_, const scalar_type& init_value_): + init_value(init_value_),result(result_) {} + + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src.val < dest.val ) + dest = src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src.val < dest.val ) + dest = src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val.val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Index, class Space = HostSpace> +struct MaxLoc { +private: + typedef typename std::remove_cv<Scalar>::type scalar_type; + typedef typename std::remove_cv<Index>::type index_type; + +public: + //Required + typedef MaxLoc reducer_type; + typedef ValLocScalar<scalar_type,index_type> value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + scalar_type init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct InitWrapper; + + template<class ValueType > + struct InitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::min(); + } + }; + + template<class ValueType > + struct InitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + +public: + + MaxLoc(value_type& result_): + init_value(InitWrapper<scalar_type>::value()),result(&result_) {} + MaxLoc(const result_view_type& result_): + init_value(InitWrapper<scalar_type>::value()),result(result_) {} + MaxLoc(value_type& result_, const scalar_type& init_value_): + init_value(init_value_),result(&result_) {} + MaxLoc(const result_view_type& result_, const scalar_type& init_value_): + init_value(init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src.val > dest.val ) + dest = src; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src.val > dest.val ) + dest = src; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val.val = init_value; + } + + result_view_type result_view() const { + return result; + } +}; + +template<class Scalar, class Index> +struct MinMaxLocScalar { + Scalar min_val,max_val; + Index min_loc,max_loc; + + KOKKOS_INLINE_FUNCTION + void operator = (const MinMaxLocScalar& rhs) { + min_val = rhs.min_val; + min_loc = rhs.min_loc; + max_val = rhs.max_val; + max_loc = rhs.max_loc; + } + + KOKKOS_INLINE_FUNCTION + void operator = (const volatile MinMaxLocScalar& rhs) volatile { + min_val = rhs.min_val; + min_loc = rhs.min_loc; + max_val = rhs.max_val; + max_loc = rhs.max_loc; + } +}; + +template<class Scalar, class Index, class Space = HostSpace> +struct MinMaxLoc { +private: + typedef typename std::remove_cv<Scalar>::type scalar_type; + typedef typename std::remove_cv<Index>::type index_type; + +public: + //Required + typedef MinMaxLoc reducer_type; + typedef MinMaxLocScalar<scalar_type,index_type> value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + scalar_type min_init_value; + scalar_type max_init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct MinInitWrapper; + + template<class ValueType > + struct MinInitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::max(); + } + }; + + template<class ValueType > + struct MinInitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct MaxInitWrapper; + + template<class ValueType > + struct MaxInitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::min(); + } + }; + + template<class ValueType > + struct MaxInitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + +public: + + MinMaxLoc(value_type& result_): + min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(&result_) {} + MinMaxLoc(const result_view_type& result_): + min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(result_) {} + MinMaxLoc(value_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): + min_init_value(min_init_value_),max_init_value(max_init_value_),result(&result_) {} + MinMaxLoc(const result_view_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): + min_init_value(min_init_value_),max_init_value(max_init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src.min_val < dest.min_val ) { + dest.min_val = src.min_val; + dest.min_loc = src.min_loc; + } + if ( src.max_val > dest.max_val ) { + dest.max_val = src.max_val; + dest.max_loc = src.max_loc; + } + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src.min_val < dest.min_val ) { + dest.min_val = src.min_val; + dest.min_loc = src.min_loc; + } + if ( src.max_val > dest.max_val ) { + dest.max_val = src.max_val; + dest.max_loc = src.max_loc; + } + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val.min_val = min_init_value; + val.max_val = max_init_value; + } + + result_view_type result_view() const { + return result; + } +}; +} +} + + +namespace Kokkos { +namespace Impl { + +template< class T, class ReturnType , class ValueTraits> +struct ParallelReduceReturnValue; + +template< class ReturnType , class FunctorType > +struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnType>::value>::type, ReturnType, FunctorType> { + typedef ReturnType return_type; + typedef InvalidType reducer_type; + + typedef typename return_type::value_type value_type_scalar; + typedef typename return_type::value_type value_type_array[]; + + typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type; + + static return_type& return_value(ReturnType& return_val, const FunctorType&) { + return return_val; + } +}; + +template< class ReturnType , class FunctorType> +struct ParallelReduceReturnValue<typename std::enable_if< + !Kokkos::is_view<ReturnType>::value && + (!std::is_array<ReturnType>::value && !std::is_pointer<ReturnType>::value) && + !Kokkos::is_reducer_type<ReturnType>::value + >::type, ReturnType, FunctorType> { + typedef Kokkos::View< ReturnType + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > return_type; + + typedef InvalidType reducer_type; + + typedef typename return_type::value_type value_type; + + static return_type return_value(ReturnType& return_val, const FunctorType&) { + return return_type(&return_val); + } +}; + +template< class ReturnType , class FunctorType> +struct ParallelReduceReturnValue<typename std::enable_if< + (is_array<ReturnType>::value || std::is_pointer<ReturnType>::value) + >::type, ReturnType, FunctorType> { + typedef Kokkos::View< typename std::remove_const<ReturnType>::type + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > return_type; + + typedef InvalidType reducer_type; + + typedef typename return_type::value_type value_type[]; + + static return_type return_value(ReturnType& return_val, + const FunctorType& functor) { + return return_type(return_val,functor.value_count); + } +}; + +template< class ReturnType , class FunctorType> +struct ParallelReduceReturnValue<typename std::enable_if< + Kokkos::is_reducer_type<ReturnType>::value + >::type, ReturnType, FunctorType> { + typedef ReturnType return_type; + typedef ReturnType reducer_type; + typedef typename return_type::value_type value_type; + + static return_type return_value(ReturnType& return_val, + const FunctorType& functor) { + return return_val; + } +}; +} + +namespace Impl { +template< class T, class ReturnType , class FunctorType> +struct ParallelReducePolicyType; + +template< class PolicyType , class FunctorType > +struct ParallelReducePolicyType<typename std::enable_if<Kokkos::Impl::is_execution_policy<PolicyType>::value>::type, PolicyType,FunctorType> { + + typedef PolicyType policy_type; + static PolicyType policy(const PolicyType& policy_) { + return policy_; + } +}; + +template< class PolicyType , class FunctorType > +struct ParallelReducePolicyType<typename std::enable_if<std::is_integral<PolicyType>::value>::type, PolicyType,FunctorType> { + typedef typename + Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space + execution_space ; + + typedef Kokkos::RangePolicy<execution_space> policy_type; + + static policy_type policy(const PolicyType& policy_) { + return policy_type(0,policy_); + } +}; + +} + +namespace Impl { + template< class FunctorType, class ExecPolicy, class ValueType, class ExecutionSpace> + struct ParallelReduceFunctorType { + typedef FunctorType functor_type; + static const functor_type& functor(const functor_type& functor) { + return functor; + } + }; +} + +namespace Impl { + + template< class PolicyType, class FunctorType, class ReturnType > + struct ParallelReduceAdaptor { + typedef Impl::ParallelReduceReturnValue<void,ReturnType,FunctorType> return_value_adapter; + #ifdef KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER + typedef Impl::ParallelReduceFunctorType<FunctorType,PolicyType, + typename return_value_adapter::value_type, + typename PolicyType::execution_space> functor_adaptor; + #endif + static inline + void execute(const std::string& label, + const PolicyType& policy, + const FunctorType& functor, + ReturnType& return_value) { + #if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelReduce("" == label ? typeid(FunctorType).name() : label, 0, &kpID); + } + #endif + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + #ifdef KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER + Impl::ParallelReduce<typename functor_adaptor::functor_type, PolicyType, typename return_value_adapter::reducer_type > + closure(functor_adaptor::functor(functor), + policy, + return_value_adapter::return_value(return_value,functor)); + #else + Impl::ParallelReduce<FunctorType, PolicyType, typename return_value_adapter::reducer_type > + closure(functor, + policy, + return_value_adapter::return_value(return_value,functor)); + #endif + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + closure.execute(); + + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelReduce(kpID); + } + #endif + } + + }; +} +/*! \fn void parallel_reduce(label,policy,functor,return_argument) + \brief Perform a parallel reduction. + \param label An optional Label giving the call name. Must be able to construct a std::string from the argument. + \param policy A Kokkos Execution Policy, such as an integer, a RangePolicy or a TeamPolicy. + \param functor A functor with a reduction operator, and optional init, join and final functions. + \param return_argument A return argument which can be a scalar, a View, or a ReducerStruct. This argument can be left out if the functor has a final function. +*/ + +/** \brief Parallel reduction + * + * parallel_reduce performs parallel reductions with arbitrary functions - i.e. + * it is not solely data based. The call expects up to 4 arguments: + * + * + * Example of a parallel_reduce functor for a POD (plain old data) value type: + * \code + * class FunctorType { // For POD value type + * public: + * typedef ... execution_space ; + * typedef <podType> value_type ; + * void operator()( <intType> iwork , <podType> & update ) const ; + * void init( <podType> & update ) const ; + * void join( volatile <podType> & update , + * volatile const <podType> & input ) const ; + * + * typedef true_type has_final ; + * void final( <podType> & update ) const ; + * }; + * \endcode + * + * Example of a parallel_reduce functor for an array of POD (plain old data) values: + * \code + * class FunctorType { // For array of POD value + * public: + * typedef ... execution_space ; + * typedef <podType> value_type[] ; + * void operator()( <intType> , <podType> update[] ) const ; + * void init( <podType> update[] ) const ; + * void join( volatile <podType> update[] , + * volatile const <podType> input[] ) const ; + * + * typedef true_type has_final ; + * void final( <podType> update[] ) const ; + * }; + * \endcode + */ + +// ReturnValue is scalar or array: take by reference + +template< class PolicyType, class FunctorType, class ReturnType > +inline +void parallel_reduce(const std::string& label, + const PolicyType& policy, + const FunctorType& functor, + ReturnType& return_value, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute(label,policy,functor,return_value); +} + +template< class PolicyType, class FunctorType, class ReturnType > +inline +void parallel_reduce(const PolicyType& policy, + const FunctorType& functor, + ReturnType& return_value, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute("",policy,functor,return_value); +} + +template< class FunctorType, class ReturnType > +inline +void parallel_reduce(const size_t& policy, + const FunctorType& functor, + ReturnType& return_value) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute("",policy_type(0,policy),functor,return_value); +} + +template< class FunctorType, class ReturnType > +inline +void parallel_reduce(const std::string& label, + const size_t& policy, + const FunctorType& functor, + ReturnType& return_value) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute(label,policy_type(0,policy),functor,return_value); +} + +// ReturnValue as View or Reducer: take by copy to allow for inline construction + +template< class PolicyType, class FunctorType, class ReturnType > +inline +void parallel_reduce(const std::string& label, + const PolicyType& policy, + const FunctorType& functor, + const ReturnType& return_value, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute(label,policy,functor,return_value); +} + +template< class PolicyType, class FunctorType, class ReturnType > +inline +void parallel_reduce(const PolicyType& policy, + const FunctorType& functor, + const ReturnType& return_value, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute("",policy,functor,return_value); +} + +template< class FunctorType, class ReturnType > +inline +void parallel_reduce(const size_t& policy, + const FunctorType& functor, + const ReturnType& return_value) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + + Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute("",policy_type(0,policy),functor,return_value); +} + +template< class FunctorType, class ReturnType > +inline +void parallel_reduce(const std::string& label, + const size_t& policy, + const FunctorType& functor, + const ReturnType& return_value) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute(label,policy_type(0,policy),functor,return_value); +} + +// No Return Argument + +template< class PolicyType, class FunctorType> +inline +void parallel_reduce(const std::string& label, + const PolicyType& policy, + const FunctorType& functor, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) + , typename ValueTraits::value_type + , typename ValueTraits::pointer_type + >::type value_type ; + + typedef Kokkos::View< value_type + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > result_view_type; + result_view_type result_view ; + + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,result_view_type>::execute(label,policy,functor,result_view); +} + +template< class PolicyType, class FunctorType > +inline +void parallel_reduce(const PolicyType& policy, + const FunctorType& functor, + typename Impl::enable_if< + Kokkos::Impl::is_execution_policy<PolicyType>::value + >::type * = 0) { + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) + , typename ValueTraits::value_type + , typename ValueTraits::pointer_type + >::type value_type ; + + typedef Kokkos::View< value_type + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > result_view_type; + result_view_type result_view ; + + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,result_view_type>::execute("",policy,functor,result_view); +} + +template< class FunctorType > +inline +void parallel_reduce(const size_t& policy, + const FunctorType& functor) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) + , typename ValueTraits::value_type + , typename ValueTraits::pointer_type + >::type value_type ; + + typedef Kokkos::View< value_type + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > result_view_type; + result_view_type result_view ; + + Impl::ParallelReduceAdaptor<policy_type,FunctorType,result_view_type>::execute("",policy_type(0,policy),functor,result_view); +} + +template< class FunctorType> +inline +void parallel_reduce(const std::string& label, + const size_t& policy, + const FunctorType& functor) { + typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) + , typename ValueTraits::value_type + , typename ValueTraits::pointer_type + >::type value_type ; + + typedef Kokkos::View< value_type + , Kokkos::HostSpace + , Kokkos::MemoryUnmanaged + > result_view_type; + result_view_type result_view ; + + Impl::ParallelReduceAdaptor<policy_type,FunctorType,result_view_type>::execute(label,policy_type(0,policy),functor,result_view); +} + + + +} //namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Qthread.hpp b/lib/kokkos/core/src/Kokkos_Qthread.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d61f8d518e6641debd19d4975b2535a6bfbcad8f --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Qthread.hpp @@ -0,0 +1,172 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_QTHREAD_HPP +#define KOKKOS_QTHREAD_HPP + +#include <cstddef> +#include <iosfwd> +#include <Kokkos_Core.hpp> +#include <Kokkos_Layout.hpp> +#include <Kokkos_MemoryTraits.hpp> +#include <Kokkos_HostSpace.hpp> +#include <Kokkos_ExecPolicy.hpp> +#include <impl/Kokkos_Tags.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +class QthreadExec ; +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Execution space supported by Qthread */ +class Qthread { +public: + //! \name Type declarations that all Kokkos devices must provide. + //@{ + + //! Tag this class as an execution space + typedef Qthread execution_space ; + typedef Kokkos::HostSpace memory_space ; + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef Kokkos::LayoutRight array_layout ; + typedef memory_space::size_type size_type ; + + typedef ScratchMemorySpace< Qthread > scratch_memory_space ; + + //@} + /*------------------------------------------------------------------------*/ + + /** \brief Initialization will construct one or more instances */ + static Qthread & instance( int = 0 ); + + /** \brief Set the execution space to a "sleep" state. + * + * This function sets the "sleep" state in which it is not ready for work. + * This may consume less resources than in an "ready" state, + * but it may also take time to transition to the "ready" state. + * + * \return True if enters or is in the "sleep" state. + * False if functions are currently executing. + */ + bool sleep(); + + /** \brief Wake from the sleep state. + * + * \return True if enters or is in the "ready" state. + * False if functions are currently executing. + */ + static bool wake(); + + /** \brief Wait until all dispatched functions to complete. + * + * The parallel_for or parallel_reduce dispatch of a functor may + * return asynchronously, before the functor completes. This + * method does not return until all dispatched functors on this + * device have completed. + */ + static void fence(); + + /*------------------------------------------------------------------------*/ + + static int in_parallel(); + + static int is_initialized(); + + /** \brief Return maximum amount of concurrency */ + static int concurrency(); + + static void initialize( int thread_count ); + static void finalize(); + + /** \brief Print configuration information to the given output stream. */ + static void print_configuration( std::ostream & , const bool detail = false ); + + int shepherd_size() const ; + int shepherd_worker_size() const ; +}; + +/*--------------------------------------------------------------------------*/ + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::Qthread::memory_space + , Kokkos::Qthread::scratch_memory_space + > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#include <Kokkos_Parallel.hpp> +#include <Qthread/Kokkos_QthreadExec.hpp> +#include <Qthread/Kokkos_Qthread_Parallel.hpp> + +#endif /* #define KOKKOS_QTHREAD_HPP */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp b/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..09a5993863e56835276b88003d59a98ba8e5b6b6 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp @@ -0,0 +1,166 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SCRATCHSPACE_HPP +#define KOKKOS_SCRATCHSPACE_HPP + +#include <stdio.h> +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Tags.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Scratch memory space associated with an execution space. + * + */ +template< class ExecSpace > +class ScratchMemorySpace { + static_assert (Impl::is_execution_space<ExecSpace>::value,"Instantiating ScratchMemorySpace on non-execution-space type."); +public: + + // Alignment of memory chunks returned by 'get' + // must be a power of two + enum { ALIGN = 8 }; + +private: + + mutable char * m_iter_L0 ; + char * m_end_L0 ; + mutable char * m_iter_L1 ; + char * m_end_L1 ; + + + mutable int m_multiplier; + mutable int m_offset; + mutable int m_default_level; + + ScratchMemorySpace(); + ScratchMemorySpace & operator = ( const ScratchMemorySpace & ); + + enum { MASK = ALIGN - 1 }; // Alignment used by View::shmem_size + +public: + + //! Tag this class as a memory space + typedef ScratchMemorySpace memory_space ; + typedef ExecSpace execution_space ; + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef typename ExecSpace::array_layout array_layout ; + typedef typename ExecSpace::size_type size_type ; + + template< typename IntType > + KOKKOS_INLINE_FUNCTION static + IntType align( const IntType & size ) + { return ( size + MASK ) & ~MASK ; } + + template< typename IntType > + KOKKOS_INLINE_FUNCTION + void* get_shmem (const IntType& size, int level = -1) const { + if(level == -1) + level = m_default_level; + if(level == 0) { + void* tmp = m_iter_L0 + m_offset * align (size); + if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) { + m_iter_L0 -= align (size) * m_multiplier; // put it back like it was + #ifdef KOKKOS_HAVE_DEBUG + // mfh 23 Jun 2015: printf call consumes 25 registers + // in a CUDA build, so only print in debug mode. The + // function still returns NULL if not enough memory. + printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate " + "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), + long(m_end_L0-m_iter_L0)); + #endif // KOKKOS_HAVE_DEBUG + tmp = 0; + } + return tmp; + } else { + void* tmp = m_iter_L1 + m_offset * align (size); + if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) { + m_iter_L1 -= align (size) * m_multiplier; // put it back like it was + #ifdef KOKKOS_HAVE_DEBUG + // mfh 23 Jun 2015: printf call consumes 25 registers + // in a CUDA build, so only print in debug mode. The + // function still returns NULL if not enough memory. + printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate " + "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), + long(m_end_L1-m_iter_L1)); + #endif // KOKKOS_HAVE_DEBUG + tmp = 0; + } + return tmp; + + } + } + + template< typename IntType > + KOKKOS_INLINE_FUNCTION + ScratchMemorySpace( void * ptr_L0 , const IntType & size_L0 , void * ptr_L1 = NULL , const IntType & size_L1 = 0) + : m_iter_L0( (char *) ptr_L0 ) + , m_end_L0( m_iter_L0 + size_L0 ) + , m_iter_L1( (char *) ptr_L1 ) + , m_end_L1( m_iter_L1 + size_L1 ) + , m_multiplier( 1 ) + , m_offset( 0 ) + , m_default_level( 0 ) + {} + + KOKKOS_INLINE_FUNCTION + const ScratchMemorySpace& set_team_thread_mode(const int& level, const int& multiplier, const int& offset) const { + m_default_level = level; + m_multiplier = multiplier; + m_offset = offset; + return *this; + } +}; + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp new file mode 100644 index 0000000000000000000000000000000000000000..233b56c93956f7898346780d1bfe327fd11afb03 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -0,0 +1,1116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_Serial.hpp +/// \brief Declaration and definition of Kokkos::Serial device. + +#ifndef KOKKOS_SERIAL_HPP +#define KOKKOS_SERIAL_HPP + +#include <cstddef> +#include <iosfwd> +#include <Kokkos_Parallel.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_Layout.hpp> +#include <Kokkos_HostSpace.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_MemoryTraits.hpp> +#include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + + +#include <KokkosExp_MDRangePolicy.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) + +namespace Kokkos { + +/// \class Serial +/// \brief Kokkos device for non-parallel execution +/// +/// A "device" represents a parallel execution model. It tells Kokkos +/// how to parallelize the execution of kernels in a parallel_for or +/// parallel_reduce. For example, the Threads device uses Pthreads or +/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language +/// extensions, and the Cuda device uses NVIDIA's CUDA programming +/// model. The Serial device executes "parallel" kernels +/// sequentially. This is useful if you really do not want to use +/// threads, or if you want to explore different combinations of MPI +/// and shared-memory parallel programming models. +class Serial { +public: + //! \name Type declarations that all Kokkos devices must provide. + //@{ + + //! Tag this class as an execution space: + typedef Serial execution_space ; + //! The size_type typedef best suited for this device. + typedef HostSpace::size_type size_type ; + //! This device's preferred memory space. + typedef HostSpace memory_space ; + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + //! This device's preferred array layout. + typedef LayoutRight array_layout ; + + /// \brief Scratch memory space + typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ; + + //@} + + /// \brief True if and only if this method is being called in a + /// thread-parallel function. + /// + /// For the Serial device, this method <i>always</i> returns false, + /// because parallel_for or parallel_reduce with the Serial device + /// always execute sequentially. + inline static int in_parallel() { return false ; } + + /** \brief Set the device in a "sleep" state. + * + * This function sets the device in a "sleep" state in which it is + * not ready for work. This may consume less resources than if the + * device were in an "awake" state, but it may also take time to + * bring the device from a sleep state to be ready for work. + * + * \return True if the device is in the "sleep" state, else false if + * the device is actively working and could not enter the "sleep" + * state. + */ + static bool sleep(); + + /// \brief Wake the device from the 'sleep' state so it is ready for work. + /// + /// \return True if the device is in the "ready" state, else "false" + /// if the device is actively working (which also means that it's + /// awake). + static bool wake(); + + /// \brief Wait until all dispatched functors complete. + /// + /// The parallel_for or parallel_reduce dispatch of a functor may + /// return asynchronously, before the functor completes. This + /// method does not return until all dispatched functors on this + /// device have completed. + static void fence() {} + + static void initialize( unsigned threads_count = 1 , + unsigned use_numa_count = 0 , + unsigned use_cores_per_numa = 0 , + bool allow_asynchronous_threadpool = false) { + (void) threads_count; + (void) use_numa_count; + (void) use_cores_per_numa; + (void) allow_asynchronous_threadpool; + + // Init the array of locks used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif + } + + static int is_initialized() { return 1 ; } + + /** \brief Return the maximum amount of concurrency. */ + static int concurrency() {return 1;}; + + //! Free any resources being consumed by the device. + static void finalize() { + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif + } + + //! Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool /* detail */ = false ) {} + + //-------------------------------------------------------------------------- + + inline static int thread_pool_size( int = 0 ) { return 1 ; } + KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; } + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); } + inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + + //-------------------------------------------------------------------------- + + static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size ); + + //-------------------------------------------------------------------------- +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::Serial::memory_space + , Kokkos::Serial::scratch_memory_space + > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +namespace SerialImpl { + +struct Sentinel { + + void * m_scratch ; + unsigned m_reduce_end ; + unsigned m_shared_end ; + + Sentinel(); + ~Sentinel(); + static Sentinel & singleton(); +}; + +inline +unsigned align( unsigned n ); +} +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +class SerialTeamMember { +private: + typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ; + const scratch_memory_space m_space ; + const int m_league_rank ; + const int m_league_size ; + + SerialTeamMember & operator = ( const SerialTeamMember & ); + +public: + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const { return m_space ; } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_scratch(int) const + { return m_space ; } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & thread_scratch(int) const + { return m_space ; } + + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + + template<class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast(const ValueType& , const int& ) const {} + + template< class ValueType, class JoinOp > + KOKKOS_INLINE_FUNCTION + ValueType team_reduce( const ValueType & value , const JoinOp & ) const + { + return value ; + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const + { + const Type tmp = global_accum ? *global_accum : Type(0) ; + if ( global_accum ) { *global_accum += value ; } + return tmp ; + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const + { return Type(0); } + + //---------------------------------------- + // Execution space specific: + + SerialTeamMember( int arg_league_rank + , int arg_league_size + , int arg_shared_size + ); +}; + +} // namespace Impl + + +/* + * < Kokkos::Serial , WorkArgTag > + * < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type > + * + */ +namespace Impl { +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Serial , Properties ... >:public PolicyTraits<Properties...> +{ +private: + + size_t m_team_scratch_size[2] ; + size_t m_thread_scratch_size[2] ; + int m_league_size ; + int m_chunk_size; + +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits<Properties ... > traits; + + //! Execution space of this execution policy: + typedef Kokkos::Serial execution_space ; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + static + int team_size_max( const FunctorType & ) { return 1 ; } + + template< class FunctorType > + static + int team_size_recommended( const FunctorType & ) { return 1 ; } + + template< class FunctorType > + static + int team_size_recommended( const FunctorType & , const int& ) { return 1 ; } + + //---------------------------------------- + + inline int team_size() const { return 1 ; } + inline int league_size() const { return m_league_size ; } + inline size_t scratch_size(const int& level, int = 0) const { return m_team_scratch_size[level] + m_thread_scratch_size[level]; } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( execution_space & + , int league_size_request + , int /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_league_size( league_size_request ) + , m_chunk_size ( 32 ) + {} + + TeamPolicyInternal( execution_space & + , int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_league_size( league_size_request ) + , m_chunk_size ( 32 ) + {} + + TeamPolicyInternal( int league_size_request + , int /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_league_size( league_size_request ) + , m_chunk_size ( 32 ) + {} + + TeamPolicyInternal( int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_league_size( league_size_request ) + , m_chunk_size ( 32 ) + {} + + + inline int chunk_size() const { return m_chunk_size ; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; + p.m_chunk_size = chunk_size_; + return p; + } + + /** \brief set per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + return p; + }; + + /** \brief set per thread scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + /** \brief set per thread and per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + typedef Impl::SerialTeamMember member_type ; +}; +} /* namespace Impl */ +} /* namespace Kokkos */ + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* Parallel patterns for Kokkos::Serial with RangePolicy */ + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType , + Kokkos::RangePolicy< Traits ... > , + Kokkos::Serial + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec() const + { + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( i ); + } + } + + template< class TagType > + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec() const + { + const TagType t{} ; + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( t , i ); + } + } + +public: + + inline + void execute() const + { this-> template exec< typename Policy::work_tag >(); } + + inline + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + +/*--------------------------------------------------------------------------*/ + +template< class FunctorType , class ReducerType , class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ... > + , ReducerType + , Kokkos::Serial + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( i , update ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + } + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + const TagType t{} ; + reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( t , i , update ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + } + +public: + + inline + void execute() const + { + pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize + ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + + this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor , + const Policy & arg_policy , + const HostViewType & arg_result_view , + typename std::enable_if< + Kokkos::is_view< HostViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.ptr_on_device() ) + { + static_assert( Kokkos::is_view< HostViewType >::value + , "Kokkos::Serial reduce result must be a View" ); + + static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value + , "Kokkos::Serial reduce result must be a View in HostSpace" ); + } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } +}; + +/*--------------------------------------------------------------------------*/ + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Serial + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + reference_type update = ValueInit::init( m_functor , ptr ); + + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( i , update , true ); + } + } + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + const TagType t{} ; + reference_type update = ValueInit::init( m_functor , ptr ); + + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + m_functor( t , i , update , true ); + } + } + +public: + + inline + void execute() const + { + pointer_type ptr = (pointer_type) + Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 ); + this-> template exec< WorkTag >( ptr ); + } + + inline + ParallelScan( const FunctorType & arg_functor + , const Policy & arg_policy + ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* Parallel patterns for Kokkos::Serial with TeamPolicy */ + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , Kokkos::Serial + > +{ +private: + + typedef TeamPolicyInternal< Kokkos::Serial , Properties ...> Policy ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const int m_league ; + const int m_shared ; + + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec() const + { + for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { + m_functor( Member(ileague,m_league,m_shared) ); + } + } + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec() const + { + const TagType t{} ; + for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { + m_functor( t , Member(ileague,m_league,m_shared) ); + } + } + +public: + + inline + void execute() const + { + Kokkos::Serial::scratch_memory_resize( 0 , m_shared ); + this-> template exec< typename Policy::work_tag >(); + } + + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_league( arg_policy.league_size() ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) + { } +}; + +/*--------------------------------------------------------------------------*/ + +template< class FunctorType , class ReducerType , class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::Serial + > +{ +private: + + typedef TeamPolicyInternal< Kokkos::Serial, Properties ... > Policy ; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const int m_league ; + const ReducerType m_reducer ; + pointer_type m_result_ptr ; + const int m_shared ; + + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { + m_functor( Member(ileague,m_league,m_shared) , update ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + } + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec( pointer_type ptr ) const + { + const TagType t{} ; + + reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { + m_functor( t , Member(ileague,m_league,m_shared) , update ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + } + +public: + + inline + void execute() const + { + pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize + ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , m_shared ); + + this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + } + + template< class ViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ViewType & arg_result , + typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_league( arg_policy.league_size() ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) ) + { + static_assert( Kokkos::is_view< ViewType >::value + , "Reduction result on Kokkos::Serial must be a Kokkos::View" ); + + static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" ); + } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_league( arg_policy.league_size() ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* Nested parallel patterns for Kokkos::Serial with TeamPolicy */ + +namespace Kokkos { +namespace Impl { + +template<typename iType> +struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> { + typedef iType index_type; + const iType begin ; + const iType end ; + enum {increment = 1}; + const SerialTeamMember& thread; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count) + : begin(0) + , end(arg_count) + , thread(arg_thread) + {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end ) + : begin( arg_begin ) + , end( arg_end) + , thread( arg_thread ) + {} +}; + + template<typename iType> + struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> { + typedef iType index_type; + enum {start = 0}; + const iType end; + enum {increment = 1}; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count): + end( count ) + {} + }; + +} // namespace Impl + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember> +TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember> +TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember > + ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread); +} + +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + + result = ValueType(); + + for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } + + result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; + + for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + + init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join)); +} + +} //namespace Kokkos + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& + loop_boundaries, const Lambda& lambda) { + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { + result = ValueType(); +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + init_result = result; +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& + loop_boundaries, const FunctorType & lambda) { + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,scan_val,true); + } +} + +} // namespace Kokkos + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) { + lambda(val); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) { + lambda(val); +} +} + +//---------------------------------------------------------------------------- + +#include <impl/Kokkos_Serial_Task.hpp> + +#endif // defined( KOKKOS_HAVE_SERIAL ) +#endif /* #define KOKKOS_SERIAL_HPP */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fc9113b75052e91fc260f95725fe360b98e548e8 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp @@ -0,0 +1,1109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_TASKPOLICY_HPP +#define KOKKOS_TASKPOLICY_HPP + +//---------------------------------------------------------------------------- + +#include <Kokkos_Core_fwd.hpp> + +// If compiling with CUDA then must be using CUDA 8 or better +// and use relocateable device code to enable the task policy. +// nvcc relocatable device code option: --relocatable-device-code=true + +#if ( defined( KOKKOS_COMPILER_NVCC ) ) + #if ( 8000 <= CUDA_VERSION ) && \ + defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE ) + + #define KOKKOS_ENABLE_TASKPOLICY + + #endif +#else + +#define KOKKOS_ENABLE_TASKPOLICY + +#endif + + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- + +#include <Kokkos_MemoryPool.hpp> +#include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_TaskQueue.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +enum TaskType { TaskTeam = Impl::TaskBase<void,void,void>::TaskTeam + , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle }; + +enum TaskPriority { TaskHighPriority = 0 + , TaskRegularPriority = 1 + , TaskLowPriority = 2 }; + +template< typename Space > +class TaskPolicy ; + +template< typename Space > +void wait( TaskPolicy< Space > const & ); + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/*\brief Implementation data for task data management, access, and execution. + * + * CRTP Inheritance structure to allow static_cast from the + * task root type and a task's FunctorType. + * + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * TaskBase< Space , ResultType , void > + * : TaskBase< Space , void , void > + * { ... }; + */ +template< typename Space , typename ResultType , typename FunctorType > +class TaskBase ; + +template< typename Space > +class TaskExec ; + +}} // namespace Kokkos::Impl + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/** + * + * Future< space > // value_type == void + * Future< value > // space == Default + * Future< value , space > + * + */ +template< typename Arg1 /* = void */ , typename Arg2 /* = void */ > +class Future { +private: + + template< typename > friend class TaskPolicy ; + template< typename , typename > friend class Future ; + template< typename , typename , typename > friend class Impl::TaskBase ; + + enum { Arg1_is_space = Kokkos::Impl::is_space< Arg1 >::value }; + enum { Arg2_is_space = Kokkos::Impl::is_space< Arg2 >::value }; + enum { Arg1_is_value = ! Arg1_is_space && + ! std::is_same< Arg1 , void >::value }; + enum { Arg2_is_value = ! Arg2_is_space && + ! std::is_same< Arg2 , void >::value }; + + static_assert( ! ( Arg1_is_space && Arg2_is_space ) + , "Future cannot be given two spaces" ); + + static_assert( ! ( Arg1_is_value && Arg2_is_value ) + , "Future cannot be given two value types" ); + + using ValueType = + typename std::conditional< Arg1_is_value , Arg1 , + typename std::conditional< Arg2_is_value , Arg2 , void + >::type >::type ; + + using Space = + typename std::conditional< Arg1_is_space , Arg1 , + typename std::conditional< Arg2_is_space , Arg2 , void + >::type >::type ; + + using task_base = Impl::TaskBase< Space , ValueType , void > ; + using queue_type = Impl::TaskQueue< Space > ; + + task_base * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + Future( task_base * task ) : m_task(0) + { if ( task ) queue_type::assign( & m_task , task ); } + + //---------------------------------------- + +public: + + using execution_space = typename Space::execution_space ; + using value_type = ValueType ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return 0 != m_task ? m_task->reference_count() : 0 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~Future() { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr Future() noexcept : m_task(0) {} + + KOKKOS_INLINE_FUNCTION + Future( Future && rhs ) + : m_task( rhs.m_task ) { rhs.m_task = 0 ; } + + KOKKOS_INLINE_FUNCTION + Future( const Future & rhs ) + : m_task(0) + { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( Future && rhs ) + { + if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future & rhs ) + { + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + //---------------------------------------- + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future( Future<A1,A2> && rhs ) + : m_task( rhs.m_task ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + rhs.m_task = 0 ; + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future( const Future<A1,A2> & rhs ) + : m_task(0) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future<A1,A2> & rhs ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future & operator = ( Future<A1,A2> && rhs ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + if ( m_task ) queue_type::assign( & m_task , (task_base*) 0 ); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + typename task_base::get_return_type + get() const + { + if ( 0 == m_task ) { + Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); + } + return m_task->get(); + } +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename ExecSpace > +class TaskPolicy +{ +private: + + using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ; + using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; + using task_base = Impl::TaskBase< ExecSpace , void , void > ; + + track_type m_track ; + queue_type * m_queue ; + + //---------------------------------------- + // Process optional arguments to spawn and respawn functions + + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const ) {} + + // TaskTeam or TaskSingle + template< typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , TaskType const & arg + , Options const & ... opts ) + { + task->m_task_type = arg ; + assign( task , opts ... ); + } + + // TaskHighPriority or TaskRegularPriority or TaskLowPriority + template< typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , TaskPriority const & arg + , Options const & ... opts ) + { + task->m_priority = arg ; + assign( task , opts ... ); + } + + // Future for a dependence + template< typename A1 , typename A2 , typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , Future< A1 , A2 > const & arg + , Options const & ... opts ) + { + // Assign dependence to task->m_next + // which will be processed within subsequent call to schedule. + // Error if the dependence is reset. + + if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) { + Kokkos::abort("TaskPolicy ERROR: resetting task dependence"); + } + + if ( 0 != arg.m_task ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_fetch_add( &(arg.m_task->m_ref_count) , 1 ); + } + + assign( task , opts ... ); + } + + //---------------------------------------- + +public: + + using execution_policy = TaskPolicy ; + using execution_space = ExecSpace ; + using memory_space = typename queue_type::memory_space ; + using member_type = Kokkos::Impl::TaskExec< ExecSpace > ; + + KOKKOS_INLINE_FUNCTION + TaskPolicy() : m_track(), m_queue(0) {} + + KOKKOS_INLINE_FUNCTION + TaskPolicy( TaskPolicy && rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskPolicy( TaskPolicy const & rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskPolicy & operator = ( TaskPolicy const & rhs ) = default ; + + TaskPolicy( memory_space const & arg_memory_space + , unsigned const arg_memory_pool_capacity + , unsigned const arg_memory_pool_log2_superblock = 12 ) + : m_track() + , m_queue(0) + { + typedef Kokkos::Experimental::Impl::SharedAllocationRecord + < memory_space , typename queue_type::Destroy > + record_type ; + + record_type * record = + record_type::allocate( arg_memory_space + , "TaskQueue" + , sizeof(queue_type) + ); + + m_queue = new( record->data() ) + queue_type( arg_memory_space + , arg_memory_pool_capacity + , arg_memory_pool_log2_superblock ); + + record->m_destroy.m_queue = m_queue ; + + m_track.assign_allocated_record_to_uninitialized( record ); + } + + //---------------------------------------- + /**\brief Allocation size for a spawned task */ + template< typename FunctorType > + KOKKOS_FUNCTION + size_t spawn_allocation_size() const + { + using task_type = Impl::TaskBase< execution_space + , typename FunctorType::value_type + , FunctorType > ; + + return m_queue->allocate_block_size( sizeof(task_type) ); + } + + /**\brief Allocation size for a when_all aggregate */ + KOKKOS_FUNCTION + size_t when_all_allocation_size( int narg ) const + { + using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + + return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) ); + } + + //---------------------------------------- + + /**\brief A task spawns a task with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + * 3) Team or Serial + */ + template< typename FunctorType , typename ... Options > + KOKKOS_FUNCTION + Future< typename FunctorType::value_type , ExecSpace > + task_spawn( FunctorType const & arg_functor + , Options const & ... arg_options + ) const + { + using value_type = typename FunctorType::value_type ; + using future_type = Future< value_type , execution_space > ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + //---------------------------------------- + // Give single-thread back-ends an opportunity to clear + // queue of ready tasks before allocating a new task + + m_queue->iff_single_thread_recursive_execute(); + + //---------------------------------------- + + future_type f ; + + // Allocate task from memory pool + f.m_task = + reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type))); + + if ( f.m_task ) { + + // Placement new construction + new ( f.m_task ) task_type( arg_functor ); + + // Reference count starts at two + // +1 for matching decrement when task is complete + // +1 for future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = sizeof(task_type); + + assign( f.m_task , arg_options... ); + + // Spawning from within the execution space so the + // apply function pointer is guaranteed to be valid + f.m_task->m_apply = task_type::apply ; + + m_queue->schedule( f.m_task ); + // this task may be updated or executed at any moment + } + + return f ; + } + + /**\brief The host process spawns a task with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + * 3) Team or Serial + */ + template< typename FunctorType , typename ... Options > + inline + Future< typename FunctorType::value_type , ExecSpace > + host_spawn( FunctorType const & arg_functor + , Options const & ... arg_options + ) const + { + using value_type = typename FunctorType::value_type ; + using future_type = Future< value_type , execution_space > ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + future_type f ; + + // Allocate task from memory pool + f.m_task = + reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) ); + + if ( f.m_task ) { + + // Placement new construction + new( f.m_task ) task_type( arg_functor ); + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = sizeof(task_type); + + assign( f.m_task , arg_options... ); + + // Potentially spawning outside execution space so the + // apply function pointer must be obtained from execution space. + // Required for Cuda execution space function pointer. + queue_type::specialization::template + proc_set_apply< FunctorType >( & f.m_task->m_apply ); + + m_queue->schedule( f.m_task ); + } + return f ; + } + + /**\brief Return a future that is complete + * when all input futures are complete. + */ + template< typename A1 , typename A2 > + KOKKOS_FUNCTION + Future< ExecSpace > + when_all( int narg , Future< A1 , A2 > const * const arg ) const + { + static_assert + ( std::is_same< execution_space + , typename Future< A1 , A2 >::execution_space + >::value + , "Future must have same execution space" ); + + using future_type = Future< ExecSpace > ; + using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + + future_type f ; + + size_t const size = sizeof(task_base) + narg * sizeof(task_base*); + + f.m_task = + reinterpret_cast< task_base * >( m_queue->allocate( size ) ); + + if ( f.m_task ) { + + new( f.m_task ) task_base(); + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = size ; + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + + task_base ** const dep = f.m_task->aggregate_dependences(); + + // Assign dependences to increment their reference count + // The futures may be destroyed upon returning from this call + // so increment reference count to track this assignment. + + for ( int i = 0 ; i < narg ; ++i ) { + task_base * const t = dep[i] = arg[i].m_task ; + if ( 0 != t ) { + Kokkos::atomic_fetch_add( &(t->m_ref_count) , 1 ); + } + } + + m_queue->schedule( f.m_task ); + // this when_all may be processed at any moment + } + + return f ; + } + + /**\brief An executing task respawns itself with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + */ + template< class FunctorType , typename ... Options > + KOKKOS_FUNCTION + void respawn( FunctorType * task_self + , Options const & ... arg_options ) const + { + using value_type = typename FunctorType::value_type ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + task_base * const zero = (task_base *) 0 ; + task_base * const lock = (task_base *) task_base::LockTag ; + task_type * const task = static_cast< task_type * >( task_self ); + + // Precondition: + // task is in Executing state + // therefore m_next == LockTag + // + // Change to m_next == 0 for no dependence + + if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) { + Kokkos::abort("TaskPolicy::respawn ERROR: already respawned"); + } + + assign( task , arg_options... ); + + // Postcondition: + // task is in Executing-Respawn state + // therefore m_next == dependece or 0 + } + + //---------------------------------------- + + template< typename S > + friend + void Kokkos::wait( Kokkos::TaskPolicy< S > const & ); + + //---------------------------------------- + + inline + int allocation_capacity() const noexcept + { return m_queue->m_memory.get_mem_size(); } + + KOKKOS_INLINE_FUNCTION + int allocated_task_count() const noexcept + { return m_queue->m_count_alloc ; } + + KOKKOS_INLINE_FUNCTION + int allocated_task_count_max() const noexcept + { return m_queue->m_max_alloc ; } + + KOKKOS_INLINE_FUNCTION + long allocated_task_count_accum() const noexcept + { return m_queue->m_accum_alloc ; } + +}; + +template< typename ExecSpace > +inline +void wait( TaskPolicy< ExecSpace > const & policy ) +{ policy.m_queue->execute(); } + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct FutureValueTypeIsVoidError {}; + +template < class ExecSpace , class ResultType , class FunctorType > +class TaskMember ; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/**\brief States of a task */ +enum TaskState + { TASK_STATE_NULL = 0 ///< Does not exist + , TASK_STATE_CONSTRUCTING = 1 ///< Is under construction + , TASK_STATE_WAITING = 2 ///< Is waiting for execution + , TASK_STATE_EXECUTING = 4 ///< Is executing + , TASK_STATE_COMPLETE = 8 ///< Execution is complete + }; + +/**\brief Tag for Future<Latch,Space> + */ +struct Latch {}; + +/** + * + * Future< space > // value_type == void + * Future< value > // space == Default + * Future< value , space > + * + */ +template< class Arg1 = void , class Arg2 = void > +class Future { +private: + + template< class , class , class > friend class Impl::TaskMember ; + template< class > friend class TaskPolicy ; + template< class , class > friend class Future ; + + // Argument #2, if not void, must be the space. + enum { Arg1_is_space = Kokkos::Impl::is_execution_space< Arg1 >::value }; + enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value }; + enum { Arg2_is_void = std::is_same< Arg2 , void >::value }; + + struct ErrorNoExecutionSpace {}; + + enum { Opt1 = Arg1_is_space && Arg2_is_void + , Opt2 = ! Arg1_is_space && Arg2_is_void + , Opt3 = ! Arg1_is_space && Arg2_is_space + , OptOK = Kokkos::Impl::StaticAssert< Opt1 || Opt2 || Opt3 , ErrorNoExecutionSpace >::value + }; + + typedef typename + Kokkos::Impl::if_c< Opt2 || Opt3 , Arg1 , void >::type + ValueType ; + + typedef typename + Kokkos::Impl::if_c< Opt1 , Arg1 , typename + Kokkos::Impl::if_c< Opt2 , Kokkos::DefaultExecutionSpace , typename + Kokkos::Impl::if_c< Opt3 , Arg2 , void + >::type >::type >::type + ExecutionSpace ; + + typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ; + typedef Impl::TaskMember< ExecutionSpace , ValueType , void > TaskValue ; + + TaskRoot * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + Future( TaskRoot * task ) + : m_task(0) + { TaskRoot::assign( & m_task , TaskRoot::template verify_type< ValueType >( task ) ); } + + //---------------------------------------- + +public: + + typedef ValueType value_type; + typedef ExecutionSpace execution_space ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskState get_task_state() const + { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; } + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return 0 != m_task ? m_task->reference_count() : 0 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~Future() { TaskRoot::assign( & m_task , 0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + Future() : m_task(0) {} + + KOKKOS_INLINE_FUNCTION + Future( const Future & rhs ) + : m_task(0) + { TaskRoot::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future & rhs ) + { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; } + + //---------------------------------------- + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future( const Future<A1,A2> & rhs ) + : m_task(0) + { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future<A1,A2> & rhs ) + { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); return *this ; } + + //---------------------------------------- + + typedef typename TaskValue::get_result_type get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const + { + if ( 0 == m_task ) { + Kokkos::abort( "Kokkos::Experimental::Future::get ERROR: is_null()"); + } + return static_cast<TaskValue*>( m_task )->get(); + } + + //---------------------------------------- +}; + +template< class Arg2 > +class Future< Latch , Arg2 > { +private: + + template< class , class , class > friend class Impl::TaskMember ; + template< class > friend class TaskPolicy ; + template< class , class > friend class Future ; + + // Argument #2, if not void, must be the space. + enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value }; + enum { Arg2_is_void = std::is_same< Arg2 , void >::value }; + + static_assert( Arg2_is_space || Arg2_is_void + , "Future template argument #2 must be a space" ); + + typedef typename + std::conditional< Arg2_is_space , Arg2 , Kokkos::DefaultExecutionSpace > + ::type ExecutionSpace ; + + typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ; + + TaskRoot * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + Future( TaskRoot * task ) + : m_task(0) + { TaskRoot::assign( & m_task , task ); } + + //---------------------------------------- + +public: + + typedef void value_type; + typedef ExecutionSpace execution_space ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void add( const int k ) const + { if ( 0 != m_task ) m_task->latch_add(k); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskState get_task_state() const + { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; } + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~Future() { TaskRoot::assign( & m_task , 0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + Future() : m_task(0) {} + + KOKKOS_INLINE_FUNCTION + Future( const Future & rhs ) + : m_task(0) + { TaskRoot::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future & rhs ) + { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; } + + //---------------------------------------- + + typedef void get_result_type ; + + KOKKOS_INLINE_FUNCTION + void get() const {} + + //---------------------------------------- + +}; + +namespace Impl { + +template< class T > +struct is_future : public std::false_type {}; + +template< class Arg0 , class Arg1 > +struct is_future< Kokkos::Experimental::Future<Arg0,Arg1> > + : public std::true_type {}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/** \brief If the argument is an execution space then a serial task in that space */ +template< class Arg0 = Kokkos::DefaultExecutionSpace > +class TaskPolicy { +public: + + typedef typename Arg0::execution_space execution_space ; + + //---------------------------------------- + + TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity = 4 + , const unsigned arg_task_team_size = 0 /* choose default */ + ); + + TaskPolicy() = default ; + TaskPolicy( TaskPolicy && rhs ) = default ; + TaskPolicy( const TaskPolicy & rhs ) = default ; + TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; + + //---------------------------------------- + /** \brief Create a serial task with storage for dependences. + * + * Postcondition: Task is in the 'constructing' state. + */ + template< class FunctorType > + Future< typename FunctorType::value_type , execution_space > + create( const FunctorType & functor + , const unsigned dependence_capacity /* = default */ ); + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + create_team( const FunctorType & functor + , const unsigned dependence_capacity /* = default */ ); + + /** \brief Set dependence that 'after' cannot start execution + * until 'before' has completed. + * + * Precondition: The 'after' task must be in then 'Constructing' state. + */ + template< class TA , class TB > + void add_dependence( const Future<TA,execution_space> & after + , const Future<TB,execution_space> & before ) const ; + + /** \brief Spawn a task in the 'Constructing' state + * + * Precondition: Task is in the 'constructing' state. + * Postcondition: Task is waiting, executing, or complete. + */ + template< class T > + const Future<T,execution_space> & + spawn( const Future<T,execution_space> & ) const ; + + //---------------------------------------- + /** \brief Query dependence of an executing task */ + + template< class FunctorType > + Future< execution_space > + get_dependence( FunctorType * , const int ) const ; + + //---------------------------------------- + /** \brief Clear current dependences of an executing task + * in preparation for setting new dependences and + * respawning. + * + * Precondition: The functor must be a task in the executing state. + */ + template< class FunctorType > + void clear_dependence( FunctorType * ) const ; + + /** \brief Set dependence that 'after' cannot resume execution + * until 'before' has completed. + * + * The 'after' functor must be in the executing state + */ + template< class FunctorType , class TB > + void add_dependence( FunctorType * after + , const Future<TB,execution_space> & before ) const ; + + /** \brief Respawn (reschedule) an executing task to be called again + * after all dependences have completed. + */ + template< class FunctorType > + void respawn( FunctorType * ) const ; +}; + +//---------------------------------------------------------------------------- +/** \brief Create and spawn a single-thread task */ +template< class ExecSpace , class FunctorType > +inline +Future< typename FunctorType::value_type , ExecSpace > +spawn( TaskPolicy<ExecSpace> & policy , const FunctorType & functor ) +{ return policy.spawn( policy.create( functor ) ); } + +/** \brief Create and spawn a single-thread task with dependences */ +template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 > +inline +Future< typename FunctorType::value_type , ExecSpace > +spawn( TaskPolicy<ExecSpace> & policy + , const FunctorType & functor + , const Future<Arg0,Arg1> & before_0 + , const Future<Arg0,Arg1> & before_1 ) +{ + Future< typename FunctorType::value_type , ExecSpace > f ; + f = policy.create( functor , 2 ); + policy.add_dependence( f , before_0 ); + policy.add_dependence( f , before_1 ); + policy.spawn( f ); + return f ; +} + +//---------------------------------------------------------------------------- +/** \brief Create and spawn a parallel_for task */ +template< class ExecSpace , class ParallelPolicyType , class FunctorType > +inline +Future< typename FunctorType::value_type , ExecSpace > +spawn_foreach( TaskPolicy<ExecSpace> & task_policy + , const ParallelPolicyType & parallel_policy + , const FunctorType & functor ) +{ return task_policy.spawn( task_policy.create_foreach( parallel_policy , functor ) ); } + +/** \brief Create and spawn a parallel_reduce task */ +template< class ExecSpace , class ParallelPolicyType , class FunctorType > +inline +Future< typename FunctorType::value_type , ExecSpace > +spawn_reduce( TaskPolicy<ExecSpace> & task_policy + , const ParallelPolicyType & parallel_policy + , const FunctorType & functor ) +{ return task_policy.spawn( task_policy.create_reduce( parallel_policy , functor ) ); } + +//---------------------------------------------------------------------------- +/** \brief Respawn a task functor with dependences */ +template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 > +inline +void respawn( TaskPolicy<ExecSpace> & policy + , FunctorType * functor + , const Future<Arg0,Arg1> & before_0 + , const Future<Arg0,Arg1> & before_1 + ) +{ + policy.clear_dependence( functor ); + policy.add_dependence( functor , before_0 ); + policy.add_dependence( functor , before_1 ); + policy.respawn( functor ); +} + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +void wait( TaskPolicy< ExecSpace > & ); + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_TASKPOLICY_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c9ebbf92652b5d9a2e859cf2587b8089897d3c62 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -0,0 +1,222 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_HPP +#define KOKKOS_THREADS_HPP + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) + +#include <cstddef> +#include <iosfwd> +#include <Kokkos_HostSpace.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_Layout.hpp> +#include <Kokkos_MemoryTraits.hpp> +#include <impl/Kokkos_Tags.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +class ThreadsExec ; +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/** \brief Execution space for a pool of Pthreads or C11 threads on a CPU. */ +class Threads { +public: + //! \name Type declarations that all Kokkos devices must provide. + //@{ + //! Tag this class as a kokkos execution space + typedef Threads execution_space ; + typedef Kokkos::HostSpace memory_space ; + + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef Kokkos::LayoutRight array_layout ; + typedef memory_space::size_type size_type ; + + typedef ScratchMemorySpace< Threads > scratch_memory_space ; + + + //@} + /*------------------------------------------------------------------------*/ + //! \name Static functions that all Kokkos devices must implement. + //@{ + + /// \brief True if and only if this method is being called in a + /// thread-parallel function. + static int in_parallel(); + + /** \brief Set the device in a "sleep" state. + * + * This function sets the device in a "sleep" state in which it is + * not ready for work. This may consume less resources than if the + * device were in an "awake" state, but it may also take time to + * bring the device from a sleep state to be ready for work. + * + * \return True if the device is in the "sleep" state, else false if + * the device is actively working and could not enter the "sleep" + * state. + */ + static bool sleep(); + + /// \brief Wake the device from the 'sleep' state so it is ready for work. + /// + /// \return True if the device is in the "ready" state, else "false" + /// if the device is actively working (which also means that it's + /// awake). + static bool wake(); + + /// \brief Wait until all dispatched functors complete. + /// + /// The parallel_for or parallel_reduce dispatch of a functor may + /// return asynchronously, before the functor completes. This + /// method does not return until all dispatched functors on this + /// device have completed. + static void fence(); + + /// \brief Free any resources being consumed by the device. + /// + /// For the Threads device, this terminates spawned worker threads. + static void finalize(); + + /// \brief Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool detail = false ); + + //@} + /*------------------------------------------------------------------------*/ + /*------------------------------------------------------------------------*/ + //! \name Space-specific functions + //@{ + + /** \brief Initialize the device in the "ready to work" state. + * + * The device is initialized in a "ready to work" or "awake" state. + * This state reduces latency and thus improves performance when + * dispatching work. However, the "awake" state consumes resources + * even when no work is being done. You may call sleep() to put + * the device in a "sleeping" state that does not consume as many + * resources, but it will take time (latency) to awaken the device + * again (via the wake()) method so that it is ready for work. + * + * Teams of threads are distributed as evenly as possible across + * the requested number of numa regions and cores per numa region. + * A team will not be split across a numa region. + * + * If the 'use_' arguments are not supplied the hwloc is queried + * to use all available cores. + */ + static void initialize( unsigned threads_count = 0 , + unsigned use_numa_count = 0 , + unsigned use_cores_per_numa = 0 , + bool allow_asynchronous_threadpool = false ); + + static int is_initialized(); + + /** \brief Return the maximum amount of concurrency. */ + static int concurrency(); + + static Threads & instance( int = 0 ); + + //---------------------------------------- + + static int thread_pool_size( int depth = 0 ); +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + static int thread_pool_rank(); +#else + KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; } +#endif + + inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); } + + //@} + //---------------------------------------- +}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::Threads::memory_space + , Kokkos::Threads::scratch_memory_space + > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +#include <Kokkos_ExecPolicy.hpp> +#include <Kokkos_Parallel.hpp> +#include <Threads/Kokkos_ThreadsExec.hpp> +#include <Threads/Kokkos_ThreadsTeam.hpp> +#include <Threads/Kokkos_Threads_Parallel.hpp> + +#include <KokkosExp_MDRangePolicy.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */ +#endif /* #define KOKKOS_THREADS_HPP */ + + diff --git a/lib/kokkos/core/src/Kokkos_Vectorization.hpp b/lib/kokkos/core/src/Kokkos_Vectorization.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a60c0ecaa7b83bd49fb187bf37ca5a84d6360744 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Vectorization.hpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_Vectorization.hpp +/// \brief Declaration and definition of Kokkos::Vectorization interface. +#ifndef KOKKOS_VECTORIZATION_HPP +#define KOKKOS_VECTORIZATION_HPP + +#if defined( KOKKOS_HAVE_CUDA ) +#include <Cuda/Kokkos_Cuda_Vectorization.hpp> +#endif + +#endif diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1cc8b0338155c8f8be724181806097a927d606d2 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -0,0 +1,2384 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VIEW_HPP +#define KOKKOS_VIEW_HPP + +#include <type_traits> +#include <string> +#include <algorithm> +#include <initializer_list> + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_HostSpace.hpp> +#include <Kokkos_MemoryTraits.hpp> +#include <Kokkos_ExecPolicy.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class DstMemorySpace , class SrcMemorySpace > +struct DeepCopy ; + +template< class DataType > +struct ViewArrayAnalysis ; + +template< class DataType , class ArrayLayout + , typename ValueType = + typename ViewArrayAnalysis< DataType >::non_const_value_type + > +struct ViewDataAnalysis ; + +template< class , class ... > +class ViewMapping { public: enum { is_assignable = false }; }; + +template< class MemorySpace > +struct ViewOperatorBoundsErrorAbort ; + +template<> +struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > { + static void apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ); +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/** \class ViewTraits + * \brief Traits class for accessing attributes of a View. + * + * This is an implementation detail of View. It is only of interest + * to developers implementing a new specialization of View. + * + * Template argument options: + * - View< DataType > + * - View< DataType , Space > + * - View< DataType , Space , MemoryTraits > + * - View< DataType , ArrayLayout > + * - View< DataType , ArrayLayout , Space > + * - View< DataType , ArrayLayout , MemoryTraits > + * - View< DataType , ArrayLayout , Space , MemoryTraits > + * - View< DataType , MemoryTraits > + */ + +template< class DataType , class ... Properties > +struct ViewTraits ; + +template<> +struct ViewTraits< void > +{ + typedef void execution_space ; + typedef void memory_space ; + typedef void HostMirrorSpace ; + typedef void array_layout ; + typedef void memory_traits ; +}; + +template< class ... Prop > +struct ViewTraits< void , void , Prop ... > +{ + // Ignore an extraneous 'void' + typedef typename ViewTraits<void,Prop...>::execution_space execution_space ; + typedef typename ViewTraits<void,Prop...>::memory_space memory_space ; + typedef typename ViewTraits<void,Prop...>::HostMirrorSpace HostMirrorSpace ; + typedef typename ViewTraits<void,Prop...>::array_layout array_layout ; + typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ; +}; + +template< class ArrayLayout , class ... Prop > +struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_array_layout<ArrayLayout>::value >::type , ArrayLayout , Prop ... > +{ + // Specify layout, keep subsequent space and memory traits arguments + + typedef typename ViewTraits<void,Prop...>::execution_space execution_space ; + typedef typename ViewTraits<void,Prop...>::memory_space memory_space ; + typedef typename ViewTraits<void,Prop...>::HostMirrorSpace HostMirrorSpace ; + typedef ArrayLayout array_layout ; + typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ; +}; + +template< class Space , class ... Prop > +struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_space<Space>::value >::type , Space , Prop ... > +{ + // Specify Space, memory traits should be the only subsequent argument. + + static_assert( std::is_same< typename ViewTraits<void,Prop...>::execution_space , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::memory_space , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::HostMirrorSpace , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::array_layout , void >::value + , "Only one View Execution or Memory Space template argument" ); + + typedef typename Space::execution_space execution_space ; + typedef typename Space::memory_space memory_space ; + typedef typename Kokkos::Impl::is_space< Space >::host_mirror_space + HostMirrorSpace ; + typedef typename execution_space::array_layout array_layout ; + typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ; +}; + +template< class MemoryTraits , class ... Prop > +struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_memory_traits<MemoryTraits>::value >::type , MemoryTraits , Prop ... > +{ + // Specify memory trait, should not be any subsequent arguments + + static_assert( std::is_same< typename ViewTraits<void,Prop...>::execution_space , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::memory_space , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::array_layout , void >::value && + std::is_same< typename ViewTraits<void,Prop...>::memory_traits , void >::value + , "MemoryTrait is the final optional template argument for a View" ); + + typedef void execution_space ; + typedef void memory_space ; + typedef void HostMirrorSpace ; + typedef void array_layout ; + typedef MemoryTraits memory_traits ; +}; + + +template< class DataType , class ... Properties > +struct ViewTraits { +private: + + // Unpack the properties arguments + typedef ViewTraits< void , Properties ... > prop ; + + typedef typename + std::conditional< ! std::is_same< typename prop::execution_space , void >::value + , typename prop::execution_space + , Kokkos::DefaultExecutionSpace + >::type + ExecutionSpace ; + + typedef typename + std::conditional< ! std::is_same< typename prop::memory_space , void >::value + , typename prop::memory_space + , typename ExecutionSpace::memory_space + >::type + MemorySpace ; + + typedef typename + std::conditional< ! std::is_same< typename prop::array_layout , void >::value + , typename prop::array_layout + , typename ExecutionSpace::array_layout + >::type + ArrayLayout ; + + typedef typename + std::conditional + < ! std::is_same< typename prop::HostMirrorSpace , void >::value + , typename prop::HostMirrorSpace + , typename Kokkos::Impl::is_space< ExecutionSpace >::host_mirror_space + >::type + HostMirrorSpace ; + + typedef typename + std::conditional< ! std::is_same< typename prop::memory_traits , void >::value + , typename prop::memory_traits + , typename Kokkos::MemoryManaged + >::type + MemoryTraits ; + + // Analyze data type's properties, + // May be specialized based upon the layout and value type + typedef Kokkos::Experimental::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ; + +public: + + //------------------------------------ + // Data type traits: + + typedef typename data_analysis::type data_type ; + typedef typename data_analysis::const_type const_data_type ; + typedef typename data_analysis::non_const_type non_const_data_type ; + + //------------------------------------ + // Compatible array of trivial type traits: + + typedef typename data_analysis::scalar_array_type scalar_array_type ; + typedef typename data_analysis::const_scalar_array_type const_scalar_array_type ; + typedef typename data_analysis::non_const_scalar_array_type non_const_scalar_array_type ; + + //------------------------------------ + // Value type traits: + + typedef typename data_analysis::value_type value_type ; + typedef typename data_analysis::const_value_type const_value_type ; + typedef typename data_analysis::non_const_value_type non_const_value_type ; + + //------------------------------------ + // Mapping traits: + + typedef ArrayLayout array_layout ; + typedef typename data_analysis::dimension dimension ; + typedef typename data_analysis::specialize specialize /* mapping specialization tag */ ; + + enum { rank = dimension::rank }; + enum { rank_dynamic = dimension::rank_dynamic }; + + //------------------------------------ + // Execution space, memory space, memory access traits, and host mirror space. + + typedef ExecutionSpace execution_space ; + typedef MemorySpace memory_space ; + typedef Kokkos::Device<ExecutionSpace,MemorySpace> device_type ; + typedef MemoryTraits memory_traits ; + typedef HostMirrorSpace host_mirror_space ; + + typedef typename MemorySpace::size_type size_type ; + + enum { is_hostspace = std::is_same< MemorySpace , HostSpace >::value }; + enum { is_managed = MemoryTraits::Unmanaged == 0 }; + enum { is_random_access = MemoryTraits::RandomAccess == 1 }; + + //------------------------------------ +}; + +/** \class View + * \brief View to an array of data. + * + * A View represents an array of one or more dimensions. + * For details, please refer to Kokkos' tutorial materials. + * + * \section Kokkos_View_TemplateParameters Template parameters + * + * This class has both required and optional template parameters. The + * \c DataType parameter must always be provided, and must always be + * first. The parameters \c Arg1Type, \c Arg2Type, and \c Arg3Type are + * placeholders for different template parameters. The default value + * of the fifth template parameter \c Specialize suffices for most use + * cases. When explaining the template parameters, we won't refer to + * \c Arg1Type, \c Arg2Type, and \c Arg3Type; instead, we will refer + * to the valid categories of template parameters, in whatever order + * they may occur. + * + * Valid ways in which template arguments may be specified: + * - View< DataType > + * - View< DataType , Layout > + * - View< DataType , Layout , Space > + * - View< DataType , Layout , Space , MemoryTraits > + * - View< DataType , Space > + * - View< DataType , Space , MemoryTraits > + * - View< DataType , MemoryTraits > + * + * \tparam DataType (required) This indicates both the type of each + * entry of the array, and the combination of compile-time and + * run-time array dimension(s). For example, <tt>double*</tt> + * indicates a one-dimensional array of \c double with run-time + * dimension, and <tt>int*[3]</tt> a two-dimensional array of \c int + * with run-time first dimension and compile-time second dimension + * (of 3). In general, the run-time dimensions (if any) must go + * first, followed by zero or more compile-time dimensions. For + * more examples, please refer to the tutorial materials. + * + * \tparam Space (required) The memory space. + * + * \tparam Layout (optional) The array's layout in memory. For + * example, LayoutLeft indicates a column-major (Fortran style) + * layout, and LayoutRight a row-major (C style) layout. If not + * specified, this defaults to the preferred layout for the + * <tt>Space</tt>. + * + * \tparam MemoryTraits (optional) Assertion of the user's intended + * access behavior. For example, RandomAccess indicates read-only + * access with limited spatial locality, and Unmanaged lets users + * wrap externally allocated memory in a View without automatic + * deallocation. + * + * \section Kokkos_View_MT MemoryTraits discussion + * + * \subsection Kokkos_View_MT_Interp MemoryTraits interpretation depends on Space + * + * Some \c MemoryTraits options may have different interpretations for + * different \c Space types. For example, with the Cuda device, + * \c RandomAccess tells Kokkos to fetch the data through the texture + * cache, whereas the non-GPU devices have no such hardware construct. + * + * \subsection Kokkos_View_MT_PrefUse Preferred use of MemoryTraits + * + * Users should defer applying the optional \c MemoryTraits parameter + * until the point at which they actually plan to rely on it in a + * computational kernel. This minimizes the number of template + * parameters exposed in their code, which reduces the cost of + * compilation. Users may always assign a View without specified + * \c MemoryTraits to a compatible View with that specification. + * For example: + * \code + * // Pass in the simplest types of View possible. + * void + * doSomething (View<double*, Cuda> out, + * View<const double*, Cuda> in) + * { + * // Assign the "generic" View in to a RandomAccess View in_rr. + * // Note that RandomAccess View objects must have const data. + * View<const double*, Cuda, RandomAccess> in_rr = in; + * // ... do something with in_rr and out ... + * } + * \endcode + */ +template< class DataType , class ... Properties > +class View ; + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#include <impl/KokkosExp_ViewMapping.hpp> +#include <impl/KokkosExp_ViewArray.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +namespace { + +constexpr Kokkos::Experimental::Impl::ALL_t + ALL = Kokkos::Experimental::Impl::ALL_t(); + +constexpr Kokkos::Experimental::Impl::WithoutInitializing_t + WithoutInitializing = Kokkos::Experimental::Impl::WithoutInitializing_t(); + +constexpr Kokkos::Experimental::Impl::AllowPadding_t + AllowPadding = Kokkos::Experimental::Impl::AllowPadding_t(); + +} + +/** \brief Create View allocation parameter bundle from argument list. + * + * Valid argument list members are: + * 1) label as a "string" or std::string + * 2) memory space instance of the View::memory_space type + * 3) execution space instance compatible with the View::memory_space + * 4) Kokkos::WithoutInitializing to bypass initialization + * 4) Kokkos::AllowPadding to allow allocation to pad dimensions for memory alignment + */ +template< class ... Args > +inline +Impl::ViewCtorProp< typename Impl::ViewCtorProp< void , Args >::type ... > +view_alloc( Args const & ... args ) +{ + typedef + Impl::ViewCtorProp< typename Impl::ViewCtorProp< void , Args >::type ... > + return_type ; + + static_assert( ! return_type::has_pointer + , "Cannot give pointer-to-memory for view allocation" ); + + return return_type( args... ); +} + +template< class ... Args > +inline +Impl::ViewCtorProp< typename Impl::ViewCtorProp< void , Args >::type ... > +view_wrap( Args const & ... args ) +{ + typedef + Impl::ViewCtorProp< typename Impl::ViewCtorProp< void , Args >::type ... > + return_type ; + + static_assert( ! return_type::has_memory_space && + ! return_type::has_execution_space && + ! return_type::has_label && + return_type::has_pointer + , "Must only give pointer-to-memory for view wrapping" ); + + return return_type( args... ); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +template< class DataType , class ... Properties > +class View ; + +template< class > struct is_view : public std::false_type {}; + +template< class D, class ... P > +struct is_view< View<D,P...> > : public std::true_type {}; + +template< class D, class ... P > +struct is_view< const View<D,P...> > : public std::true_type {}; + +template< class DataType , class ... Properties > +class View : public ViewTraits< DataType , Properties ... > { +private: + + template< class , class ... > friend class View ; + template< class , class ... > friend class Impl::ViewMapping ; + +public: + + typedef ViewTraits< DataType , Properties ... > traits ; + +private: + + typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + map_type m_map ; + +public: + + //---------------------------------------- + /** \brief Compatible view of array of scalar types */ + typedef View< typename traits::scalar_array_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + array_type ; + + /** \brief Compatible view of const data type */ + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef View< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + non_const_type ; + + /** \brief Compatible HostMirror view */ + typedef View< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::host_mirror_space > + HostMirror ; + + //---------------------------------------- + // Domain rank and extents + + enum { Rank = map_type::Rank }; + + /** \brief rank() to be implemented + */ + //KOKKOS_INLINE_FUNCTION + //static + //constexpr unsigned rank() { return map_type::Rank; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + extent( const iType & r ) const + { return m_map.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , int >::type + extent_int( const iType & r ) const + { return static_cast<int>(m_map.extent(r)); } + + KOKKOS_INLINE_FUNCTION constexpr + typename traits::array_layout layout() const + { return m_map.layout(); } + + //---------------------------------------- + /* Deprecate all 'dimension' functions in favor of + * ISO/C++ vocabulary 'extent'. + */ + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + dimension( const iType & r ) const { return extent( r ); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() * + m_map.dimension_1() * + m_map.dimension_2() * + m_map.dimension_3() * + m_map.dimension_4() * + m_map.dimension_5() * + m_map.dimension_6() * + m_map.dimension_7(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + + //---------------------------------------- + // Range span is the span which contains all members. + + typedef typename map_type::reference_type reference_type ; + typedef typename map_type::pointer_type pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } + // Deprecated, use 'span()' instead + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + + // Deprecated, use 'span_is_contigous()' instead + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + // Deprecated, use 'data()' instead + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + + //---------------------------------------- + // Allow specializations to query their specialized map + + KOKKOS_INLINE_FUNCTION + const Kokkos::Experimental::Impl::ViewMapping< traits , void > & + implementation_map() const { return m_map ; } + + //---------------------------------------- + +private: + + enum { + is_layout_left = std::is_same< typename traits::array_layout + , Kokkos::LayoutLeft >::value , + + is_layout_right = std::is_same< typename traits::array_layout + , Kokkos::LayoutRight >::value , + + is_layout_stride = std::is_same< typename traits::array_layout + , Kokkos::LayoutStride >::value , + + is_default_map = + std::is_same< typename traits::specialize , void >::value && + ( is_layout_left || is_layout_right || is_layout_stride ) + }; + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + +#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ + Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; + +#else + +#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); + +#endif + +public: + + //------------------------------ + // Rank 0 operator() + + template< class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value + && ( 0 == Rank ) + ), reference_type >::type + operator()( Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,args...) ) + + return m_map.reference(); + } + + //------------------------------ + // Rank 1 operator() + + template< typename I0 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + + return m_map.reference(i0); + } + + template< typename I0 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + + return m_map.m_handle[ i0 ]; + } + + template< typename I0 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + + return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; + } + + //------------------------------ + // Rank 1 operator[] + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + + return m_map.reference(i0); + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + + return m_map.m_handle[ i0 ]; + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + + return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; + } + + //------------------------------ + // Rank 2 + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.reference(i0,i1); + } + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; + } + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; + } + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; + } + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; + } + + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + + return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + + i1 * m_map.m_offset.m_stride.S1 ]; + } + + //------------------------------ + // Rank 3 + + template< typename I0 , typename I1 , typename I2 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value + && ( 3 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; + } + + template< typename I0 , typename I1 , typename I2 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value + && ( 3 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) ) + + return m_map.reference(i0,i1,i2); + } + + //------------------------------ + // Rank 4 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value + && ( 4 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value + && ( 4 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) ) + + return m_map.reference(i0,i1,i2,i3); + } + + //------------------------------ + // Rank 5 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value + && ( 5 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value + && ( 5 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) ) + + return m_map.reference(i0,i1,i2,i3,i4); + } + + //------------------------------ + // Rank 6 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value + && ( 6 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value + && ( 6 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) ) + + return m_map.reference(i0,i1,i2,i3,i4,i5); + } + + //------------------------------ + // Rank 7 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value + && ( 7 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value + && ( 7 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } + + //------------------------------ + // Rank 8 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value + && ( 8 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + + return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value + && ( 8 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 + , Args ... args ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + + return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7); + } + +#undef KOKKOS_VIEW_OPERATOR_VERIFY + + //---------------------------------------- + // Standard destructor, constructors, and assignment operators + + KOKKOS_INLINE_FUNCTION + ~View() {} + + KOKKOS_INLINE_FUNCTION + View() : m_track(), m_map() {} + + KOKKOS_INLINE_FUNCTION + View( const View & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ) {} + + KOKKOS_INLINE_FUNCTION + View( View && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ) {} + + KOKKOS_INLINE_FUNCTION + View & operator = ( const View & rhs ) { m_track = rhs.m_track ; m_map = rhs.m_map ; return *this ; } + + KOKKOS_INLINE_FUNCTION + View & operator = ( View && rhs ) { m_track = rhs.m_track ; m_map = rhs.m_map ; return *this ; } + + //---------------------------------------- + // Compatible view copy constructor and assignment + // may assign unmanaged from managed. + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + View( const View<RT,RP...> & rhs ) + : m_track( rhs.m_track , traits::is_managed ) + , m_map() + { + typedef typename View<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + View & operator = ( const View<RT,RP...> & rhs ) + { + typedef typename View<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View copy assignment" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + m_track.assign( rhs.m_track , traits::is_managed ); + return *this ; + } + + //---------------------------------------- + // Compatible subview constructor + // may assign unmanaged from managed. + + template< class RT , class ... RP , class Arg0 , class ... Args > + KOKKOS_INLINE_FUNCTION + View( const View< RT , RP... > & src_view + , const Arg0 & arg0 , Args ... args ) + : m_track( src_view.m_track , traits::is_managed ) + , m_map() + { + typedef View< RT , RP... > SrcType ; + + typedef Kokkos::Experimental::Impl::ViewMapping + < void /* deduce destination view type from source view traits */ + , typename SrcType::traits + , Arg0 , Args... > Mapping ; + + typedef typename Mapping::type DstType ; + + static_assert( Kokkos::Experimental::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable + , "Subview construction requires compatible view and subview arguments" ); + + Mapping::assign( m_map, src_view.m_map, arg0 , args... ); + } + + //---------------------------------------- + // Allocation tracking properties + + KOKKOS_INLINE_FUNCTION + int use_count() const + { return m_track.use_count(); } + + inline + const std::string label() const + { return m_track.template get_label< typename traits::memory_space >(); } + + //---------------------------------------- + // Allocation according to allocation properties and array layout + + template< class ... P > + explicit inline + View( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() + , m_map() + { + // Append layout and spaces if not input + typedef Impl::ViewCtorProp< P ... > alloc_prop_input ; + + // use 'std::integral_constant<unsigned,I>' for non-types + // to avoid duplicate class error. + typedef Impl::ViewCtorProp + < P ... + , typename std::conditional + < alloc_prop_input::has_label + , std::integral_constant<unsigned,0> + , typename std::string + >::type + , typename std::conditional + < alloc_prop_input::has_memory_space + , std::integral_constant<unsigned,1> + , typename traits::device_type::memory_space + >::type + , typename std::conditional + < alloc_prop_input::has_execution_space + , std::integral_constant<unsigned,2> + , typename traits::device_type::execution_space + >::type + > alloc_prop ; + + static_assert( traits::is_managed + , "View allocation constructor requires managed memory" ); + + if ( alloc_prop::initialize && + ! alloc_prop::execution_space::is_initialized() ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception("Constructing View and initializing data with uninitialized execution space"); + } + + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop( arg_prop ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + Kokkos::Experimental::Impl::SharedAllocationRecord<> * + record = m_map.allocate_shared( prop , arg_layout ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized( record ); + } + + // Wrap memory according to properties and array layout + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + View( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() // No memory tracking + , m_map( arg_prop , arg_layout ) + { + static_assert( + std::is_same< pointer_type + , typename Impl::ViewCtorProp< P... >::pointer_type + >::value , + "Constructing View to wrap user memory must supply matching pointer type" ); + } + + // Simple dimension-only layout + template< class ... P > + explicit inline + View( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : View( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + View( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : View( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // Allocate with label and layout + template< typename Label > + explicit inline + View( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label<Label>::value , + typename traits::array_layout >::type const & arg_layout + ) + : View( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout ) + {} + + // Allocate label and layout, must disambiguate from subview constructor. + template< typename Label > + explicit inline + View( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label<Label>::value , + const size_t >::type arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : View( Impl::ViewCtorProp< std::string >( arg_label ) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // For backward compatibility + explicit inline + View( const ViewAllocateWithoutInitializing & arg_prop + , const typename traits::array_layout & arg_layout + ) + : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ) + , arg_layout + ) + {} + + explicit inline + View( const ViewAllocateWithoutInitializing & arg_prop + , const size_t arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + //---------------------------------------- + // Memory span required to wrap these dimensions. + static constexpr size_t required_allocation_size( + const size_t arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + { + return map_type::memory_span( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ); + } + + explicit KOKKOS_INLINE_FUNCTION + View( pointer_type arg_ptr + , const size_t arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : View( Impl::ViewCtorProp<pointer_type>(arg_ptr) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + explicit KOKKOS_INLINE_FUNCTION + View( pointer_type arg_ptr + , const typename traits::array_layout & arg_layout + ) + : View( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout ) + {} + + //---------------------------------------- + // Shared scratch memory constructor + + static inline + size_t shmem_size( const size_t arg_N0 = ~size_t(0) , + const size_t arg_N1 = ~size_t(0) , + const size_t arg_N2 = ~size_t(0) , + const size_t arg_N3 = ~size_t(0) , + const size_t arg_N4 = ~size_t(0) , + const size_t arg_N5 = ~size_t(0) , + const size_t arg_N6 = ~size_t(0) , + const size_t arg_N7 = ~size_t(0) ) + { + const size_t num_passed_args = + ( arg_N0 != ~size_t(0) ) + ( arg_N1 != ~size_t(0) ) + ( arg_N2 != ~size_t(0) ) + + ( arg_N3 != ~size_t(0) ) + ( arg_N4 != ~size_t(0) ) + ( arg_N5 != ~size_t(0) ) + + ( arg_N6 != ~size_t(0) ) + ( arg_N7 != ~size_t(0) ); + + if ( std::is_same<typename traits::specialize,void>::value && num_passed_args != traits::rank_dynamic ) { + Kokkos::abort( "Kokkos::View::shmem_size() rank_dynamic != number of arguments.\n" ); + } + + return map_type::memory_span( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ); + } + + explicit KOKKOS_INLINE_FUNCTION + View( const typename traits::execution_space::scratch_memory_space & arg_space + , const typename traits::array_layout & arg_layout ) + : View( Impl::ViewCtorProp<pointer_type>( + reinterpret_cast<pointer_type>( + arg_space.get_shmem( map_type::memory_span( arg_layout ) ) ) ) + , arg_layout ) + {} + + explicit KOKKOS_INLINE_FUNCTION + View( const typename traits::execution_space::scratch_memory_space & arg_space + , const size_t arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 ) + : View( Impl::ViewCtorProp<pointer_type>( + reinterpret_cast<pointer_type>( + arg_space.get_shmem( + map_type::memory_span( + typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) ) + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} +}; + + + /** \brief Temporary free function rank() + * until rank() is implemented + * in the View + */ + template < typename D , class ... P > + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank( const View<D , P...> & V ) { return V.Rank; } //Temporary until added to view + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< class V , class ... Args > +using Subview = + typename Kokkos::Experimental::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , typename V::traits + , Args ... + >::type ; + +template< class D, class ... P , class ... Args > +KOKKOS_INLINE_FUNCTION +typename Kokkos::Experimental::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , Args ... + >::type +subview( const View< D, P... > & src , Args ... args ) +{ + static_assert( View< D , P... >::Rank == sizeof...(Args) , + "subview requires one argument for each source View rank" ); + + return typename + Kokkos::Experimental::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P ... > + , Args ... >::type( src , args ... ); +} + +template< class MemoryTraits , class D, class ... P , class ... Args > +KOKKOS_INLINE_FUNCTION +typename Kokkos::Experimental::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , Args ... + >::template apply< MemoryTraits >::type +subview( const View< D, P... > & src , Args ... args ) +{ + static_assert( View< D , P... >::Rank == sizeof...(Args) , + "subview requires one argument for each source View rank" ); + + return typename + Kokkos::Experimental::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P ... > + , Args ... > + ::template apply< MemoryTraits > + ::type( src , args ... ); +} + + + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +template< class LT , class ... LP , class RT , class ... RP > +KOKKOS_INLINE_FUNCTION +bool operator == ( const View<LT,LP...> & lhs , + const View<RT,RP...> & rhs ) +{ + // Same data, layout, dimensions + typedef ViewTraits<LT,LP...> lhs_traits ; + typedef ViewTraits<RT,RP...> rhs_traits ; + + return + std::is_same< typename lhs_traits::const_value_type , + typename rhs_traits::const_value_type >::value && + std::is_same< typename lhs_traits::array_layout , + typename rhs_traits::array_layout >::value && + std::is_same< typename lhs_traits::memory_space , + typename rhs_traits::memory_space >::value && + unsigned(lhs_traits::rank) == unsigned(rhs_traits::rank) && + lhs.data() == rhs.data() && + lhs.span() == rhs.span() && + lhs.dimension_0() == rhs.dimension_0() && + lhs.dimension_1() == rhs.dimension_1() && + lhs.dimension_2() == rhs.dimension_2() && + lhs.dimension_3() == rhs.dimension_3() && + lhs.dimension_4() == rhs.dimension_4() && + lhs.dimension_5() == rhs.dimension_5() && + lhs.dimension_6() == rhs.dimension_6() && + lhs.dimension_7() == rhs.dimension_7(); +} + +template< class LT , class ... LP , class RT , class ... RP > +KOKKOS_INLINE_FUNCTION +bool operator != ( const View<LT,LP...> & lhs , + const View<RT,RP...> & rhs ) +{ + return ! ( operator==(lhs,rhs) ); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +inline +void shared_allocation_tracking_claim_and_disable() +{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); } + +inline +void shared_allocation_tracking_release_and_enable() +{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); } + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class OutputView , typename Enable = void > +struct ViewFill { + + typedef typename OutputView::const_value_type const_value_type ; + + const OutputView output ; + const_value_type input ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i0 ) const + { + const size_t n1 = output.dimension_1(); + const size_t n2 = output.dimension_2(); + const size_t n3 = output.dimension_3(); + const size_t n4 = output.dimension_4(); + const size_t n5 = output.dimension_5(); + const size_t n6 = output.dimension_6(); + const size_t n7 = output.dimension_7(); + + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) { + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) { + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) { + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) { + for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) { + output(i0,i1,i2,i3,i4,i5,i6,i7) = input ; + }}}}}}} + } + + ViewFill( const OutputView & arg_out , const_value_type & arg_in ) + : output( arg_out ), input( arg_in ) + { + typedef typename OutputView::execution_space execution_space ; + typedef Kokkos::RangePolicy< execution_space > Policy ; + + const Kokkos::Impl::ParallelFor< ViewFill , Policy > closure( *this , Policy( 0 , output.dimension_0() ) ); + + closure.execute(); + + execution_space::fence(); + } +}; + +template< class OutputView > +struct ViewFill< OutputView , typename std::enable_if< OutputView::Rank == 0 >::type > { + ViewFill( const OutputView & dst , const typename OutputView::const_value_type & src ) + { + Kokkos::Impl::DeepCopy< typename OutputView::memory_space , Kokkos::HostSpace > + ( dst.data() , & src , sizeof(typename OutputView::const_value_type) ); + } +}; + +template< class OutputView , class InputView , class ExecSpace = typename OutputView::execution_space > +struct ViewRemap { + + const OutputView output ; + const InputView input ; + const size_t n0 ; + const size_t n1 ; + const size_t n2 ; + const size_t n3 ; + const size_t n4 ; + const size_t n5 ; + const size_t n6 ; + const size_t n7 ; + + ViewRemap( const OutputView & arg_out , const InputView & arg_in ) + : output( arg_out ), input( arg_in ) + , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) + , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) + , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) + , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) + , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) + , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) + , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) + , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) + { + typedef Kokkos::RangePolicy< ExecSpace > Policy ; + const Kokkos::Impl::ParallelFor< ViewRemap , Policy > closure( *this , Policy( 0 , n0 ) ); + closure.execute(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i0 ) const + { + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) { + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) { + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) { + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) { + for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) { + output(i0,i1,i2,i3,i4,i5,i6,i7) = input(i0,i1,i2,i3,i4,i5,i6,i7); + }}}}}}} + } +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/** \brief Deep copy a value from Host memory into a view. */ +template< class DT , class ... DP > +inline +void deep_copy + ( const View<DT,DP...> & dst + , typename ViewTraits<DT,DP...>::const_value_type & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type , + typename ViewTraits<DT,DP...>::value_type >::value + , "deep_copy requires non-const type" ); + + Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value ); +} + +/** \brief Deep copy into a value in Host memory from a view. */ +template< class ST , class ... SP > +inline +void deep_copy + ( typename ViewTraits<ST,SP...>::non_const_value_type & dst + , const View<ST,SP...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value + >::type * = 0 ) +{ + static_assert( ViewTraits<ST,SP...>::rank == 0 + , "ERROR: Non-rank-zero view in deep_copy( value , View )" ); + + typedef ViewTraits<ST,SP...> src_traits ; + typedef typename src_traits::memory_space src_memory_space ; + Kokkos::Impl::DeepCopy< HostSpace , src_memory_space >( & dst , src.data() , sizeof(ST) ); +} + +//---------------------------------------------------------------------------- +/** \brief A deep copy between views of compatible type, and rank zero. */ +template< class DT , class ... DP , class ST , class ... SP > +inline +void deep_copy + ( const View<DT,DP...> & dst + , const View<ST,SP...> & src + , typename std::enable_if<( + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value && + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value && + ( unsigned(ViewTraits<DT,DP...>::rank) == unsigned(0) && + unsigned(ViewTraits<ST,SP...>::rank) == unsigned(0) ) + )>::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value + , "deep_copy requires matching non-const destination type" ); + + typedef View<DT,DP...> dst_type ; + typedef View<ST,SP...> src_type ; + + typedef typename dst_type::value_type value_type ; + typedef typename dst_type::memory_space dst_memory_space ; + typedef typename src_type::memory_space src_memory_space ; + + if ( dst.data() != src.data() ) { + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , sizeof(value_type) ); + } +} + +//---------------------------------------------------------------------------- +/** \brief A deep copy between views of the default specialization, compatible type, + * same non-zero rank, same contiguous layout. + */ +template< class DT , class ... DP , class ST , class ... SP > +inline +void deep_copy + ( const View<DT,DP...> & dst + , const View<ST,SP...> & src + , typename std::enable_if<( + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value && + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value && + ( unsigned(ViewTraits<DT,DP...>::rank) != 0 || + unsigned(ViewTraits<ST,SP...>::rank) != 0 ) + )>::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<DT,DP...>::non_const_value_type >::value + , "deep_copy requires non-const destination type" ); + + static_assert( + ( unsigned(ViewTraits<DT,DP...>::rank) == + unsigned(ViewTraits<ST,SP...>::rank) ) + , "deep_copy requires Views of equal rank" ); + + typedef View<DT,DP...> dst_type ; + typedef View<ST,SP...> src_type ; + + typedef typename dst_type::execution_space dst_execution_space ; + typedef typename src_type::execution_space src_execution_space ; + typedef typename dst_type::memory_space dst_memory_space ; + typedef typename src_type::memory_space src_memory_space ; + + enum { DstExecCanAccessSrc = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + + enum { SrcExecCanAccessDst = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + + + if ( (void *) dst.data() != (void*) src.data() ) { + + // Concern: If overlapping views then a parallel copy will be erroneous. + // ... + + // If same type, equal layout, equal dimensions, equal span, and contiguous memory then can byte-wise copy + + if ( std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value && + ( + ( std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename ViewTraits<ST,SP...>::array_layout >::value + && + ( std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename Kokkos::LayoutLeft>::value + || + std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename Kokkos::LayoutRight>::value + ) + ) + || + ( ViewTraits<DT,DP...>::rank == 1 && + ViewTraits<ST,SP...>::rank == 1 ) + ) && + dst.span_is_contiguous() && + src.span_is_contiguous() && + dst.span() == src.span() && + dst.dimension_0() == src.dimension_0() && + dst.dimension_1() == src.dimension_1() && + dst.dimension_2() == src.dimension_2() && + dst.dimension_3() == src.dimension_3() && + dst.dimension_4() == src.dimension_4() && + dst.dimension_5() == src.dimension_5() && + dst.dimension_6() == src.dimension_6() && + dst.dimension_7() == src.dimension_7() ) { + + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , nbytes ); + } + else if ( std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value && + ( + ( std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename ViewTraits<ST,SP...>::array_layout >::value + && + std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename Kokkos::LayoutStride>::value + ) + || + ( ViewTraits<DT,DP...>::rank == 1 && + ViewTraits<ST,SP...>::rank == 1 ) + ) && + dst.span_is_contiguous() && + src.span_is_contiguous() && + dst.span() == src.span() && + dst.dimension_0() == src.dimension_0() && + dst.dimension_1() == src.dimension_1() && + dst.dimension_2() == src.dimension_2() && + dst.dimension_3() == src.dimension_3() && + dst.dimension_4() == src.dimension_4() && + dst.dimension_5() == src.dimension_5() && + dst.dimension_6() == src.dimension_6() && + dst.dimension_7() == src.dimension_7() && + dst.stride_0() == src.stride_0() && + dst.stride_1() == src.stride_1() && + dst.stride_2() == src.stride_2() && + dst.stride_3() == src.stride_3() && + dst.stride_4() == src.stride_4() && + dst.stride_5() == src.stride_5() && + dst.stride_6() == src.stride_6() && + dst.stride_7() == src.stride_7() + ) { + + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , nbytes ); + } + else if ( DstExecCanAccessSrc ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + } + else if ( SrcExecCanAccessDst ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); + } + else { + Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); + } + } +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/** \brief Deep copy a value from Host memory into a view. */ +template< class ExecSpace ,class DT , class ... DP > +inline +void deep_copy + ( const ExecSpace & + , const View<DT,DP...> & dst + , typename ViewTraits<DT,DP...>::const_value_type & value + , typename std::enable_if< + Kokkos::Impl::is_execution_space< ExecSpace >::value && + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type , + typename ViewTraits<DT,DP...>::value_type >::value + , "deep_copy requires non-const type" ); + + Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value ); +} + +/** \brief Deep copy into a value in Host memory from a view. */ +template< class ExecSpace , class ST , class ... SP > +inline +void deep_copy + ( const ExecSpace & exec_space + , typename ViewTraits<ST,SP...>::non_const_value_type & dst + , const View<ST,SP...> & src + , typename std::enable_if< + Kokkos::Impl::is_execution_space< ExecSpace >::value && + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value + >::type * = 0 ) +{ + static_assert( ViewTraits<ST,SP...>::rank == 0 + , "ERROR: Non-rank-zero view in deep_copy( value , View )" ); + + typedef ViewTraits<ST,SP...> src_traits ; + typedef typename src_traits::memory_space src_memory_space ; + Kokkos::Impl::DeepCopy< HostSpace , src_memory_space , ExecSpace > + ( exec_space , & dst , src.data() , sizeof(ST) ); +} + +//---------------------------------------------------------------------------- +/** \brief A deep copy between views of compatible type, and rank zero. */ +template< class ExecSpace , class DT , class ... DP , class ST , class ... SP > +inline +void deep_copy + ( const ExecSpace & exec_space + , const View<DT,DP...> & dst + , const View<ST,SP...> & src + , typename std::enable_if<( + Kokkos::Impl::is_execution_space< ExecSpace >::value && + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value && + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value && + ( unsigned(ViewTraits<DT,DP...>::rank) == unsigned(0) && + unsigned(ViewTraits<ST,SP...>::rank) == unsigned(0) ) + )>::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value + , "deep_copy requires matching non-const destination type" ); + + typedef View<DT,DP...> dst_type ; + typedef View<ST,SP...> src_type ; + + typedef typename dst_type::value_type value_type ; + typedef typename dst_type::memory_space dst_memory_space ; + typedef typename src_type::memory_space src_memory_space ; + + if ( dst.data() != src.data() ) { + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space , ExecSpace > + ( exec_space , dst.data() , src.data() , sizeof(value_type) ); + } +} + +//---------------------------------------------------------------------------- +/** \brief A deep copy between views of the default specialization, compatible type, + * same non-zero rank, same contiguous layout. + */ +template< class ExecSpace , class DT, class ... DP, class ST, class ... SP > +inline +void deep_copy + ( const ExecSpace & exec_space + , const View<DT,DP...> & dst + , const View<ST,SP...> & src + , typename std::enable_if<( + Kokkos::Impl::is_execution_space< ExecSpace >::value && + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value && + std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value && + ( unsigned(ViewTraits<DT,DP...>::rank) != 0 || + unsigned(ViewTraits<ST,SP...>::rank) != 0 ) + )>::type * = 0 ) +{ + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<DT,DP...>::non_const_value_type >::value + , "deep_copy requires non-const destination type" ); + + static_assert( + ( unsigned(ViewTraits<DT,DP...>::rank) == + unsigned(ViewTraits<ST,SP...>::rank) ) + , "deep_copy requires Views of equal rank" ); + + typedef View<DT,DP...> dst_type ; + typedef View<ST,SP...> src_type ; + + typedef typename dst_type::execution_space dst_execution_space ; + typedef typename src_type::execution_space src_execution_space ; + typedef typename dst_type::memory_space dst_memory_space ; + typedef typename src_type::memory_space src_memory_space ; + + enum { DstExecCanAccessSrc = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + + enum { SrcExecCanAccessDst = + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + + if ( (void *) dst.data() != (void*) src.data() ) { + + // Concern: If overlapping views then a parallel copy will be erroneous. + // ... + + // If same type, equal layout, equal dimensions, equal span, and contiguous memory then can byte-wise copy + + if ( std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value && + ( + std::is_same< typename ViewTraits<DT,DP...>::array_layout , + typename ViewTraits<ST,SP...>::array_layout >::value + || + ( ViewTraits<DT,DP...>::rank == 1 && + ViewTraits<ST,SP...>::rank == 1 ) + ) && + dst.span_is_contiguous() && + src.span_is_contiguous() && + dst.span() == src.span() && + dst.dimension_0() == src.dimension_0() && + dst.dimension_1() == src.dimension_1() && + dst.dimension_2() == src.dimension_2() && + dst.dimension_3() == src.dimension_3() && + dst.dimension_4() == src.dimension_4() && + dst.dimension_5() == src.dimension_5() && + dst.dimension_6() == src.dimension_6() && + dst.dimension_7() == src.dimension_7() ) { + + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space , ExecSpace > + ( exec_space , dst.data() , src.data() , nbytes ); + } + else if ( DstExecCanAccessSrc ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + } + else if ( SrcExecCanAccessDst ) { + // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. + Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); + } + else { + Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); + } + } +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// Deduce Mirror Types +template<class Space, class T, class ... P> +struct MirrorViewType { + // The incoming view_type + typedef typename Kokkos::Experimental::View<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::View<data_type,array_layout,Space> dest_view_type; + // If it is the same memory_space return the existsing view_type + // This will also keep the unmanaged trait if necessary + typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; +}; + +template<class Space, class T, class ... P> +struct MirrorType { + // The incoming view_type + typedef typename Kokkos::Experimental::View<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::View<data_type,array_layout,Space> view_type; +}; + +} + +template< class T , class ... P > +inline +typename Kokkos::Experimental::View<T,P...>::HostMirror +create_mirror( const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if< + ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef View<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + return dst_type( std::string( src.label() ).append("_mirror") + , src.dimension_0() + , src.dimension_1() + , src.dimension_2() + , src.dimension_3() + , src.dimension_4() + , src.dimension_5() + , src.dimension_6() + , src.dimension_7() ); +} + +template< class T , class ... P > +inline +typename Kokkos::Experimental::View<T,P...>::HostMirror +create_mirror( const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if< + std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef View<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + Kokkos::LayoutStride layout ; + + layout.dimension[0] = src.dimension_0(); + layout.dimension[1] = src.dimension_1(); + layout.dimension[2] = src.dimension_2(); + layout.dimension[3] = src.dimension_3(); + layout.dimension[4] = src.dimension_4(); + layout.dimension[5] = src.dimension_5(); + layout.dimension[6] = src.dimension_6(); + layout.dimension[7] = src.dimension_7(); + + layout.stride[0] = src.stride_0(); + layout.stride[1] = src.stride_1(); + layout.stride[2] = src.stride_2(); + layout.stride[3] = src.stride_3(); + layout.stride[4] = src.stride_4(); + layout.stride[5] = src.stride_5(); + layout.stride[6] = src.stride_6(); + layout.stride[7] = src.stride_7(); + + return dst_type( std::string( src.label() ).append("_mirror") , layout ); +} + + +// Create a mirror in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::View<T,P...> & src) { + return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout()); +} + +template< class T , class ... P > +inline +typename Kokkos::Experimental::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if<( + std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space + , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type + , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return src ; +} + +template< class T , class ... P > +inline +typename Kokkos::Experimental::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if< ! ( + std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space + , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type + , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return Kokkos::Experimental::create_mirror( src ); +} + +// Create a mirror view in a new space (specialization for same space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return src; +} + +// Create a mirror view in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src + , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout()); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +/** \brief Resize a view with copying old data to new data at the corresponding indices. */ +template< class T , class ... P > +inline +void resize( Kokkos::Experimental::View<T,P...> & v , + const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) +{ + typedef Kokkos::Experimental::View<T,P...> view_type ; + + static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); + + view_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6, n7 ); + + Kokkos::Experimental::Impl::ViewRemap< view_type , view_type >( v_resized , v ); + + v = v_resized ; +} + +/** \brief Resize a view with copying old data to new data at the corresponding indices. */ +template< class T , class ... P > +inline +void realloc( Kokkos::Experimental::View<T,P...> & v , + const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) +{ + typedef Kokkos::Experimental::View<T,P...> view_type ; + + static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); + + const std::string label = v.label(); + + v = view_type(); // Deallocate first, if the only view to allocation + v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 ); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class D , class ... P > +using ViewTraits = Kokkos::Experimental::ViewTraits<D,P...> ; + +using Experimental::View ; //modified due to gcc parser bug +//template< class D , class ... P > +//using View = Kokkos::Experimental::View<D,P...> ; + +using Kokkos::Experimental::ALL ; +using Kokkos::Experimental::WithoutInitializing ; +using Kokkos::Experimental::AllowPadding ; +using Kokkos::Experimental::view_alloc ; +using Kokkos::Experimental::view_wrap ; + +using Kokkos::Experimental::deep_copy ; +using Kokkos::Experimental::create_mirror ; +using Kokkos::Experimental::create_mirror_view ; +using Kokkos::Experimental::subview ; +using Kokkos::Experimental::resize ; +using Kokkos::Experimental::realloc ; +using Kokkos::Experimental::is_view ; + +namespace Impl { + +using Kokkos::Experimental::is_view ; + +class ViewDefault {}; + +template< class SrcViewType + , class Arg0Type + , class Arg1Type + , class Arg2Type + , class Arg3Type + , class Arg4Type + , class Arg5Type + , class Arg6Type + , class Arg7Type + > +struct ViewSubview /* { typedef ... type ; } */ ; + +} + +} /* namespace Kokkos */ + +#include <impl/Kokkos_Atomic_View.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_VIEW_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_hwloc.hpp b/lib/kokkos/core/src/Kokkos_hwloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ff713c95239197e57b51fafe51d9a6b69bb1472e --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_hwloc.hpp @@ -0,0 +1,144 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HWLOC_HPP +#define KOKKOS_HWLOC_HPP + +#include <utility> + +namespace Kokkos { + +/** \brief Minimal subset of logical 'hwloc' functionality available + * from http://www.open-mpi.org/projects/hwloc/. + * + * The calls are NOT thread safe in order to avoid mutexes, + * memory allocations, or other actions which could give the + * runtime system an opportunity to migrate the threads or + * touch allocated memory during the function calls. + * + * All calls to these functions should be performed by a thread + * when it has guaranteed exclusive access; e.g., for OpenMP + * within a 'critical' region. + */ +namespace hwloc { + +/** \brief Query if hwloc is available */ +bool available(); + +/** \brief Query number of available NUMA regions. + * This will be less than the hardware capacity + * if the MPI process is pinned to a NUMA region. + */ +unsigned get_available_numa_count(); + +/** \brief Query number of available cores per NUMA regions. + * This will be less than the hardware capacity + * if the MPI process is pinned to a set of cores. + */ +unsigned get_available_cores_per_numa(); + +/** \brief Query number of available "hard" threads per core; i.e., hyperthreads */ +unsigned get_available_threads_per_core(); + +} /* namespace hwloc */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Internal functions for binding persistent spawned threads. + +namespace Kokkos { +namespace hwloc { + +/** \brief Recommend mapping of threads onto cores. + * + * If thread_count == 0 then choose and set a value. + * If use_numa_count == 0 then choose and set a value. + * If use_cores_per_numa == 0 then choose and set a value. + * + * Return 0 if asynchronous, + * Return 1 if synchronous and threads_coord[0] is process core + */ +unsigned thread_mapping( const char * const label , + const bool allow_async , + unsigned & thread_count , + unsigned & use_numa_count , + unsigned & use_cores_per_numa , + std::pair<unsigned,unsigned> threads_coord[] ); + +/** \brief Query core-coordinate of the current thread + * with respect to the core_topology. + * + * As long as the thread is running within the + * process binding the following condition holds. + * + * core_coordinate.first < core_topology.first + * core_coordinate.second < core_topology.second + */ +std::pair<unsigned,unsigned> get_this_thread_coordinate(); + +/** \brief Bind the current thread to a core. */ +bool bind_this_thread( const std::pair<unsigned,unsigned> ); + + +/** \brief Can hwloc bind threads? */ +bool can_bind_threads(); + +/** \brief Bind the current thread to one of the cores in the list. + * Set that entry to (~0,~0) and return the index. + * If binding fails return ~0. + */ +unsigned bind_this_thread( const unsigned coordinate_count , + std::pair<unsigned,unsigned> coordinate[] ); + +/** \brief Unbind the current thread back to the original process binding */ +bool unbind_this_thread(); + +} /* namespace hwloc */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #define KOKKOS_HWLOC_HPP */ + diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..dc27d341ac8ee4a40150bc93476b994666189739 --- /dev/null +++ b/lib/kokkos/core/src/Makefile @@ -0,0 +1,124 @@ +KOKKOS_PATH = ../.. + +PREFIX ?= /usr/local/lib/kokkos + +default: messages build-lib + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(NVCC_WRAPPER) + LINKFLAGS ?= +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= g++ + LINKFLAGS ?= +endif + +PWD = $(shell pwd) + +KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) +KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) +KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) +KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) +KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) + +CONDITIONAL_COPIES = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + CONDITIONAL_COPIES += copy-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) + CONDITIONAL_COPIES += copy-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_HEADERS_QTHREAD += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) + CONDITIONAL_COPIES += copy-qthread +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + CONDITIONAL_COPIES += copy-openmp +endif + +messages: + echo "Start Build" + +build-makefile-kokkos: + rm -f Makefile.kokkos + echo "#Global Settings used to generate this library" >> Makefile.kokkos + echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos + echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos + echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos + echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos + echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos + echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos + echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos + echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos + echo "CXX ?= $(CXX)" >> Makefile.kokkos + echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos + echo "" >> Makefile.kokkos + echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos + echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos + echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos + echo "" >> Makefile.kokkos + echo "#Variables used in application Makefiles" >> Makefile.kokkos + echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos + echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos + echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos + echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos + echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos + echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos + sed \ + -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ + -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ + -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ + -e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \ + > Makefile.kokkos.tmp + mv -f Makefile.kokkos.tmp Makefile.kokkos + +build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS) + +mkdir: + mkdir -p $(PREFIX) + mkdir -p $(PREFIX)/bin + mkdir -p $(PREFIX)/include + mkdir -p $(PREFIX)/lib + mkdir -p $(PREFIX)/include/impl + +copy-cuda: mkdir + mkdir -p $(PREFIX)/include/Cuda + cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda + +copy-threads: mkdir + mkdir -p $(PREFIX)/include/Threads + cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads + +copy-qthread: mkdir + mkdir -p $(PREFIX)/include/Qthread + cp $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread + +copy-openmp: mkdir + mkdir -p $(PREFIX)/include/OpenMP + cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP + +install: mkdir $(CONDITIONAL_COPIES) build-lib + cp $(NVCC_WRAPPER) $(PREFIX)/bin + cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include + cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl + cp Makefile.kokkos $(PREFIX) + cp libkokkos.a $(PREFIX)/lib + cp KokkosCore_config.h $(PREFIX)/include + +clean: kokkos-clean + rm -f Makefile.kokkos diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..27ae5803cebef27646b16ef360d896ee919a9692 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -0,0 +1,750 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMP_PARALLEL_HPP +#define KOKKOS_OPENMP_PARALLEL_HPP + +#include <omp.h> +#include <iostream> +#include <Kokkos_Parallel.hpp> +#include <OpenMP/Kokkos_OpenMPexec.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend ) + { + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( iwork ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend ) + { + const TagType t{} ; + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( t , iwork ); + } + } + +public: + + inline void execute() const { + this->template execute_schedule<typename Policy::schedule_type::type>(); + } + + template<class Schedule> + inline + typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type + execute_schedule() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + + ParallelFor::template exec_range< WorkTag >( m_functor , range.begin() , range.end() ); + } +/* END #pragma omp parallel */ + } + + template<class Schedule> + inline + typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type + execute_schedule() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); + exec.reset_steal_target(); + #pragma omp barrier + + long work_index = exec.get_work_index(); + + while(work_index != -1) { + const Member begin = static_cast<Member>(work_index) * m_policy.chunk_size(); + const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); + ParallelFor::template exec_range< WorkTag >( m_functor , begin, end ); + work_index = exec.get_work_index(); + } + + } +/* END #pragma omp parallel */ + } + + inline + ParallelFor( const FunctorType & arg_functor + , Policy arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ...> + , ReducerType + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + // Static Assert WorkTag void if ReducerType not InvalidType + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update ) + { + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( iwork , update ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update ) + { + const TagType t{} ; + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( t , iwork , update ); + } + } + +public: + + inline void execute() const { + this->template execute_schedule<typename Policy::schedule_type::type>(); + } + + template<class Schedule> + inline + typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type + execute_schedule() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + + OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + ParallelReduce::template exec_range< WorkTag > + ( m_functor , range.begin() , range.end() + , ValueInit::init( ReducerConditional::select(m_functor , m_reducer), exec.scratch_reduce() ) ); + } +/* END #pragma omp parallel */ + + // Reduction: + + const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + + for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + if ( m_result_ptr ) { + const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } + } + } + + template<class Schedule> + inline + typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type + execute_schedule() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + + OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); + exec.reset_steal_target(); + #pragma omp barrier + + long work_index = exec.get_work_index(); + + reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ); + while(work_index != -1) { + const Member begin = static_cast<Member>(work_index) * m_policy.chunk_size(); + const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); + ParallelReduce::template exec_range< WorkTag > + ( m_functor , begin,end + , update ); + work_index = exec.get_work_index(); + } + } +/* END #pragma omp parallel */ + + // Reduction: + + const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + + for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + if ( m_result_ptr ) { + const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } + } + } + + //---------------------------------------- + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ViewType & arg_result_view + , typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( iwork , update , final ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + const TagType t{} ; + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( t , iwork , update , final ); + } + } + +public: + + inline + void execute() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan"); + + OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 ); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + const pointer_type ptr = + pointer_type( exec.scratch_reduce() ) + + ValueTraits::value_count( m_functor ); + ParallelScan::template exec_range< WorkTag > + ( m_functor , range.begin() , range.end() + , ValueInit::init( m_functor , ptr ) , false ); + } +/* END #pragma omp parallel */ + + { + const unsigned thread_count = OpenMPexec::pool_size(); + const unsigned value_count = ValueTraits::value_count( m_functor ); + + pointer_type ptr_prev = 0 ; + + for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) { + + pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() ); + + if ( ptr_prev ) { + for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; } + ValueJoin::join( m_functor , ptr + value_count , ptr ); + } + else { + ValueInit::init( m_functor , ptr ); + } + + ptr_prev = ptr ; + } + } + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + const pointer_type ptr = pointer_type( exec.scratch_reduce() ); + ParallelScan::template exec_range< WorkTag > + ( m_functor , range.begin() , range.end() + , ValueOps::reference( ptr ) , true ); + } +/* END #pragma omp parallel */ + } + + //---------------------------------------- + + inline + ParallelScan( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} + + //---------------------------------------- +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + const int m_shmem_size ; + + template< class TagType, class Schedule > + inline static + typename std::enable_if< std::is_same< TagType , void >::value && std::is_same<Schedule,Kokkos::Static>::value>::type + exec_team( const FunctorType & functor , Member member ) + { + for ( ; member.valid_static() ; member.next_static() ) { + functor( member ); + } + } + + template< class TagType, class Schedule > + inline static + typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same<Schedule,Kokkos::Static>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + const TagType t{} ; + for ( ; member.valid_static() ; member.next_static() ) { + functor( t , member ); + } + } + + template< class TagType, class Schedule > + inline static + typename std::enable_if< std::is_same< TagType , void >::value && std::is_same<Schedule,Kokkos::Dynamic>::value>::type + exec_team( const FunctorType & functor , Member member ) + { + #pragma omp barrier + for ( ; member.valid_dynamic() ; member.next_dynamic() ) { + functor( member ); + } + } + + template< class TagType, class Schedule > + inline static + typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same<Schedule,Kokkos::Dynamic>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + #pragma omp barrier + const TagType t{} ; + for ( ; member.valid_dynamic() ; member.next_dynamic() ) { + functor( t , member ); + } + } + +public: + + inline + void execute() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + + const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + + OpenMPexec::resize_scratch( 0 , team_reduce_size + m_shmem_size + m_policy.scratch_size(1)); + +#pragma omp parallel + { + ParallelFor::template exec_team< WorkTag, typename Policy::schedule_type::type> + ( m_functor + , Member( * OpenMPexec::get_thread_omp(), m_policy, m_shmem_size, 0) ); + } +/* END #pragma omp parallel */ + } + + inline + ParallelFor( const FunctorType & arg_functor , + const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + {} +}; + + +template< class FunctorType , class ReducerType, class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + const int m_shmem_size ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + for ( ; member.valid_static() ; member.next_static() ) { + functor( member , update ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + const TagType t{} ; + for ( ; member.valid_static() ; member.next_static() ) { + functor( t , member , update ); + } + } + +public: + + inline + void execute() const + { + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + + const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + + OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , team_reduce_size + m_shmem_size ); + +#pragma omp parallel + { + OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + + ParallelReduce::template exec_team< WorkTag > + ( m_functor + , Member( exec , m_policy , m_shmem_size, 0 ) + , ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ) ); + } +/* END #pragma omp parallel */ + + { + const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + + int max_active_threads = OpenMPexec::pool_size(); + if( max_active_threads > m_policy.league_size()* m_policy.team_size() ) + max_active_threads = m_policy.league_size()* m_policy.team_size(); + + for ( int i = 1 ; i < max_active_threads ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + if ( m_result_ptr ) { + const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } + } + } + } + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor , + const Policy & arg_policy , + const ViewType & arg_result , + typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + {} + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_OPENMP_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e22033f7c058dc6c084c445685c80beb8620da8 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -0,0 +1,329 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_TaskQueue_impl.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::OpenMP > ; + +//---------------------------------------------------------------------------- + +TaskExec< Kokkos::OpenMP >:: +TaskExec() + : m_self_exec( 0 ) + , m_team_exec( 0 ) + , m_sync_mask( 0 ) + , m_sync_value( 0 ) + , m_sync_step( 0 ) + , m_group_rank( 0 ) + , m_team_rank( 0 ) + , m_team_size( 1 ) +{ +} + +TaskExec< Kokkos::OpenMP >:: +TaskExec( Kokkos::Impl::OpenMPexec & arg_exec , int const arg_team_size ) + : m_self_exec( & arg_exec ) + , m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ) + , m_sync_mask( 0 ) + , m_sync_value( 0 ) + , m_sync_step( 0 ) + , m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ) + , m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ) + , m_team_size( arg_team_size ) +{ + // This team spans + // m_self_exec->pool_rev( team_size * group_rank ) + // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) + + int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); + + sync[0] = int64_t(0) ; + sync[1] = int64_t(0) ; + + for ( int i = 0 ; i < m_team_size ; ++i ) { + m_sync_value |= int64_t(1) << (8*i); + m_sync_mask |= int64_t(3) << (8*i); + } + + Kokkos::memory_fence(); +} + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void TaskExec< Kokkos::OpenMP >::team_barrier_impl() const +{ + if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { + Kokkos::abort("TaskQueue<OpenMP> scratch_reduce memory too small"); + } + + // Use team shared memory to synchronize. + // Alternate memory locations between barriers to avoid a sequence + // of barriers overtaking one another. + + int64_t volatile * const sync = + ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); + + // This team member sets one byte within the sync variable + int8_t volatile * const sync_self = + ((int8_t *) sync) + m_team_rank ; + +#if 0 +fprintf( stdout + , "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n" + , m_group_rank + , m_team_rank + , m_sync_step + , m_sync_value + , *sync + ); +fflush(stdout); +#endif + + *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival + + while ( m_sync_value != *sync ); // wait for team to arrive + +#if 0 +fprintf( stdout + , "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n" + , m_group_rank + , m_team_rank + , m_sync_step + , m_sync_value + , *sync + ); +fflush(stdout); +#endif + + ++m_sync_step ; + + if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step + m_sync_value ^= m_sync_mask ; + if ( 1000 < m_sync_step ) m_sync_step = 0 ; + } +} + +#endif + +//---------------------------------------------------------------------------- + +void TaskQueueSpecialization< Kokkos::OpenMP >::execute + ( TaskQueue< Kokkos::OpenMP > * const queue ) +{ + using execution_space = Kokkos::OpenMP ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using PoolExec = Kokkos::Impl::OpenMPexec ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + // Required: team_size <= 8 + + const int team_size = PoolExec::pool_size(2); // Threads per core + // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + + if ( 8 < team_size ) { + Kokkos::abort("TaskQueue<OpenMP> unsupported team size"); + } + +#pragma omp parallel + { + PoolExec & self = *PoolExec::get_thread_omp(); + + Member single_exec ; + Member team_exec( self , team_size ); + + // Team shared memory + task_root_type * volatile * const task_shared = + (task_root_type **) team_exec.m_team_exec->scratch_thread(); + +// Barrier across entire OpenMP thread pool to insure initialization +#pragma omp barrier + + // Loop until all queues are empty and no tasks in flight + + do { + + task_root_type * task = 0 ; + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + if ( 0 == team_exec.team_rank() ) { + + task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + } + + // Team lead broadcast acquired task to team members: + + if ( 1 < team_exec.team_size() ) { + + if ( 0 == team_exec.team_rank() ) *task_shared = task ; + + // Fence to be sure task_shared is stored before the barrier + Kokkos::memory_fence(); + + // Whole team waits for every team member to reach this statement + team_exec.team_barrier(); + + // Fence to be sure task_shared is stored + Kokkos::memory_fence(); + + task = *task_shared ; + } + +#if 0 +fprintf( stdout + , "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n" + , team_exec.m_group_rank + , team_exec.m_team_rank + , uintptr_t(task_shared) + , uintptr_t(task) + ); +fflush(stdout); +#endif + + if ( 0 == task ) break ; // 0 == m_ready_count + + if ( end == task ) { + // All team members wait for whole team to reach this statement. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + else if ( task_root_type::TaskTeam == task->m_task_type ) { + // Thread Team Task + (*task->m_apply)( task , & team_exec ); + + // The m_apply function performs a barrier + + if ( 0 == team_exec.team_rank() ) { + // team member #0 completes the task, which may delete the task + queue->complete( task ); + } + } + else { + // Single Thread Task + + if ( 0 == team_exec.team_rank() ) { + + (*task->m_apply)( task , & single_exec ); + + queue->complete( task ); + } + + // All team members wait for whole team to reach this statement. + // Not necessary to complete the task. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + } while(1); + } +// END #pragma omp parallel + +} + +void TaskQueueSpecialization< Kokkos::OpenMP >:: + iff_single_thread_recursive_execute + ( TaskQueue< Kokkos::OpenMP > * const queue ) +{ + using execution_space = Kokkos::OpenMP ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = TaskExec< execution_space > ; + + if ( 1 == omp_get_num_threads() ) { + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member single_exec ; + + task_root_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & single_exec ); + + queue->complete( task ); + + } while(1); + } +} + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2761247c40c930d1b454acfc373be2c8d8aaf4a3 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -0,0 +1,356 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_OPENMP_TASK_HPP +#define KOKKOS_IMPL_OPENMP_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskQueueSpecialization< Kokkos::OpenMP > +{ +public: + + using execution_space = Kokkos::OpenMP ; + using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; + using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + + // Must specify memory space + using memory_space = Kokkos::HostSpace ; + + static + void iff_single_thread_recursive_execute( queue_type * const ); + + // Must provide task queue execution function + static void execute( queue_type * const ); + + // Must provide mechanism to set function pointer in + // execution space from the host process. + template< typename FunctorType > + static + void proc_set_apply( task_base_type::function_type * ptr ) + { + using TaskType = TaskBase< Kokkos::OpenMP + , typename FunctorType::value_type + , FunctorType + > ; + *ptr = TaskType::apply ; + } +}; + +extern template class TaskQueue< Kokkos::OpenMP > ; + +//---------------------------------------------------------------------------- + +template<> +class TaskExec< Kokkos::OpenMP > +{ +private: + + TaskExec( TaskExec && ) = delete ; + TaskExec( TaskExec const & ) = delete ; + TaskExec & operator = ( TaskExec && ) = delete ; + TaskExec & operator = ( TaskExec const & ) = delete ; + + + using PoolExec = Kokkos::Impl::OpenMPexec ; + + friend class Kokkos::Impl::TaskQueue< Kokkos::OpenMP > ; + friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::OpenMP > ; + + PoolExec * const m_self_exec ; ///< This thread's thread pool data structure + PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure + int64_t m_sync_mask ; + int64_t mutable m_sync_value ; + int mutable m_sync_step ; + int m_group_rank ; ///< Which "team" subset of thread pool + int m_team_rank ; ///< Which thread within a team + int m_team_size ; + + TaskExec(); + TaskExec( PoolExec & arg_exec , int arg_team_size ); + + void team_barrier_impl() const ; + +public: + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + void * team_shared() const + { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } + + int team_shared_size() const + { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } + + /**\brief Whole team enters this function call + * before any teeam member returns from + * this function call. + */ + void team_barrier() const { if ( 1 < m_team_size ) team_barrier_impl(); } +#else + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } +#endif + + KOKKOS_INLINE_FUNCTION + int team_rank() const { return m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const { return m_team_size ; } +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > > +TeamThreadRange + ( Impl::TaskExec< Kokkos::OpenMP > & thread + , const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > > +TeamThreadRange + ( Impl:: TaskExec< Kokkos::OpenMP > & thread + , const iType & start + , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >(thread,start,end); +} + +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support. +*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >& loop_boundaries + , const Lambda& lambda + ) +{ + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i); + } +} + +template<typename iType, class Lambda, typename ValueType> +KOKKOS_INLINE_FUNCTION +void parallel_reduce + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >& loop_boundaries + , const Lambda& lambda + , ValueType& initialized_result) +{ + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i, result); + } + + if ( 1 < loop_boundaries.thread.team_size() ) { + + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + + loop_boundaries.thread.team_barrier(); + shared[team_rank] = result; + + loop_boundaries.thread.team_barrier(); + + // reduce across threads to thread 0 + if (team_rank == 0) { + for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { + shared[0] += shared[i]; + } + } + + loop_boundaries.thread.team_barrier(); + + // broadcast result + initialized_result = shared[0]; + } + else { + initialized_result = result ; + } +} + +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i, result); + } + + if ( 1 < loop_boundaries.thread.team_size() ) { + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + + loop_boundaries.thread.team_barrier(); + shared[team_rank] = result; + + loop_boundaries.thread.team_barrier(); + + // reduce across threads to thread 0 + if (team_rank == 0) { + for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { + join(shared[0], shared[i]); + } + } + + loop_boundaries.thread.team_barrier(); + + // broadcast result + initialized_result = shared[0]; + } + else { + initialized_result = result ; + } +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) +{ +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ +} + +template< typename ValueType, typename iType, class Lambda > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, + const Lambda & lambda) +{ + ValueType accum = 0 ; + ValueType val, local_total; + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + int team_size = loop_boundaries.thread.team_size(); + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + + // Intra-member scan + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + local_total = 0; + lambda(i,local_total,false); + val = accum; + lambda(i,val,true); + accum += local_total; + } + + shared[team_rank] = accum; + loop_boundaries.thread.team_barrier(); + + // Member 0 do scan on accumulated totals + if (team_rank == 0) { + for( iType i = 1; i < team_size; i+=1) { + shared[i] += shared[i-1]; + } + accum = 0; // Member 0 set accum to 0 in preparation for inter-member scan + } + + loop_boundaries.thread.team_barrier(); + + // Inter-member scan adding in accumulated totals + if (team_rank != 0) { accum = shared[team_rank-1]; } + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + local_total = 0; + lambda(i,local_total,false); + val = accum; + lambda(i,val,true); + accum += local_total; + } +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, + const Lambda & lambda) +{ +} + + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */ + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d06a2f66149f93bd43d6a4976ae9060b8833997 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp @@ -0,0 +1,408 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> +#include <limits> +#include <iostream> +#include <vector> +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <iostream> +#include <impl/Kokkos_CPUDiscovery.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + +#ifdef KOKKOS_HAVE_OPENMP + +namespace Kokkos { +namespace Impl { +namespace { + +KOKKOS_INLINE_FUNCTION +int kokkos_omp_in_parallel(); + +int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); + +KOKKOS_INLINE_FUNCTION +int kokkos_omp_in_parallel() +{ +#ifndef __CUDA_ARCH__ + return omp_in_parallel() && ! kokkos_omp_in_critical_region ; +#else + return 0; +#endif +} + +bool s_using_hwloc = false; + +} // namespace +} // namespace Impl +} // namespace Kokkos + + +namespace Kokkos { +namespace Impl { + +int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; + +int OpenMPexec::m_pool_topo[ 4 ] = { 0 }; + +OpenMPexec * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; + +void OpenMPexec::verify_is_process( const char * const label ) +{ + if ( omp_in_parallel() ) { + std::string msg( label ); + msg.append( " ERROR: in parallel" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +void OpenMPexec::verify_initialized( const char * const label ) +{ + if ( 0 == m_pool[0] ) { + std::string msg( label ); + msg.append( " ERROR: not initialized" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } + + if ( omp_get_max_threads() != Kokkos::OpenMP::thread_pool_size(0) ) { + std::string msg( label ); + msg.append( " ERROR: Initialized but threads modified inappropriately" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } + +} + +void OpenMPexec::clear_scratch() +{ +#pragma omp parallel + { + const int rank_rev = m_map_rank[ omp_get_thread_num() ]; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + if ( m_pool[ rank_rev ] ) { + Record * const r = Record::get_record( m_pool[ rank_rev ] ); + m_pool[ rank_rev ] = 0 ; + Record::decrement( r ); + } + } +/* END #pragma omp parallel */ +} + +void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size ) +{ + enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 }; + enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK }; + + const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ; + const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ; + + reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ; + thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ; + + // Requesting allocation and old allocation is too small: + + const bool allocate = ( old_reduce_size < reduce_size ) || + ( old_thread_size < thread_size ); + + if ( allocate ) { + if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; } + if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; } + } + + const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ; + const int pool_size = m_pool_topo[0] ; + + if ( allocate ) { + + clear_scratch(); + +#pragma omp parallel + { + const int rank_rev = m_map_rank[ omp_get_thread_num() ]; + const int rank = pool_size - ( rank_rev + 1 ); + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + + Record * const r = Record::allocate( Kokkos::HostSpace() + , "openmp_scratch" + , alloc_size ); + + Record::increment( r ); + + m_pool[ rank_rev ] = reinterpret_cast<OpenMPexec*>( r->data() ); + + new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size ); + } +/* END #pragma omp parallel */ + } +} + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +int OpenMP::is_initialized() +{ return 0 != Impl::OpenMPexec::m_pool[0]; } + +void OpenMP::initialize( unsigned thread_count , + unsigned use_numa_count , + unsigned use_cores_per_numa ) +{ + // Before any other call to OMP query the maximum number of threads + // and save the value for re-initialization unit testing. + + //Using omp_get_max_threads(); is problematic in conjunction with + //Hwloc on Intel (essentially an initial call to the OpenMP runtime + //without a parallel region before will set a process mask for a single core + //The runtime will than bind threads for a parallel region to other cores on the + //entering the first parallel region and make the process mask the aggregate of + //the thread masks. The intend seems to be to make serial code run fast, if you + //compile with OpenMP enabled but don't actually use parallel regions or so + //static int omp_max_threads = omp_get_max_threads(); + int nthreads = 0; + #pragma omp parallel + { + #pragma omp atomic + nthreads++; + } + + static int omp_max_threads = nthreads; + + const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ; + + bool thread_spawn_failed = false ; + + if ( ! is_initialized ) { + + // Use hwloc thread pinning if concerned with locality. + // If spreading threads across multiple NUMA regions. + // If hyperthreading is enabled. + Impl::s_using_hwloc = hwloc::available() && ( + ( 1 < Kokkos::hwloc::get_available_numa_count() ) || + ( 1 < Kokkos::hwloc::get_available_threads_per_core() ) ); + + std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ]; + + // If hwloc available then use it's maximum value. + + if ( thread_count == 0 ) { + thread_count = Impl::s_using_hwloc + ? Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core() + : omp_max_threads ; + } + + if(Impl::s_using_hwloc) + hwloc::thread_mapping( "Kokkos::OpenMP::initialize" , + false /* do not allow asynchronous */ , + thread_count , + use_numa_count , + use_cores_per_numa , + threads_coord ); + + // Spawn threads: + + omp_set_num_threads( thread_count ); + + // Verify OMP interaction: + if ( int(thread_count) != omp_get_max_threads() ) { + thread_spawn_failed = true ; + } + + // Verify spawning and bind threads: +#pragma omp parallel + { +#pragma omp critical + { + if ( int(thread_count) != omp_get_num_threads() ) { + thread_spawn_failed = true ; + } + + // Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region. + // Call to 'new' may not be thread safe as well. + + // Reverse the rank for threads so that the scan operation reduces to the highest rank thread. + + const unsigned omp_rank = omp_get_thread_num(); + const unsigned thread_r = Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() + ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) + : omp_rank ; + + Impl::OpenMPexec::m_map_rank[ omp_rank ] = thread_r ; + } +/* END #pragma omp critical */ + } +/* END #pragma omp parallel */ + + if ( ! thread_spawn_failed ) { + Impl::OpenMPexec::m_pool_topo[0] = thread_count ; + Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; + Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; + + Impl::OpenMPexec::resize_scratch( 1024 , 1024 ); + } + } + + if ( is_initialized || thread_spawn_failed ) { + std::string msg("Kokkos::OpenMP::initialize ERROR"); + + if ( is_initialized ) { msg.append(" : already initialized"); } + if ( thread_spawn_failed ) { msg.append(" : failed spawning threads"); } + + Kokkos::Impl::throw_runtime_exception(msg); + } + + // Check for over-subscription + if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { + std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; + std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; + std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; + std::cout << " Requested: " << thread_count << " threads per process." << std::endl; + } + // Init the array for used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif +} + +//---------------------------------------------------------------------------- + +void OpenMP::finalize() +{ + Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" ); + Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" ); + + Impl::OpenMPexec::clear_scratch(); + + Impl::OpenMPexec::m_pool_topo[0] = 0 ; + Impl::OpenMPexec::m_pool_topo[1] = 0 ; + Impl::OpenMPexec::m_pool_topo[2] = 0 ; + + omp_set_num_threads(1); + + if ( Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ) { + hwloc::unbind_this_thread(); + } + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif +} + +//---------------------------------------------------------------------------- + +void OpenMP::print_configuration( std::ostream & s , const bool detail ) +{ + Impl::OpenMPexec::verify_is_process( "OpenMP::print_configuration" ); + + s << "Kokkos::OpenMP" ; + +#if defined( KOKKOS_HAVE_OPENMP ) + s << " KOKKOS_HAVE_OPENMP" ; +#endif +#if defined( KOKKOS_HAVE_HWLOC ) + + const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]" + << " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" ) + ; +#endif + + const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ; + + if ( is_initialized ) { + const int numa_count = Kokkos::Impl::OpenMPexec::m_pool_topo[0] / Kokkos::Impl::OpenMPexec::m_pool_topo[1] ; + const int core_per_numa = Kokkos::Impl::OpenMPexec::m_pool_topo[1] / Kokkos::Impl::OpenMPexec::m_pool_topo[2] ; + const int thread_per_core = Kokkos::Impl::OpenMPexec::m_pool_topo[2] ; + + s << " thread_pool_topology[ " << numa_count + << " x " << core_per_numa + << " x " << thread_per_core + << " ]" + << std::endl ; + + if ( detail ) { + std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPexec::m_pool_topo[0] ); + +#pragma omp parallel + { +#pragma omp critical + { + coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate(); + } +/* END #pragma omp critical */ + } +/* END #pragma omp parallel */ + + for ( unsigned i = 0 ; i < coord.size() ; ++i ) { + s << " thread omp_rank[" << i << "]" + << " kokkos_rank[" << Impl::OpenMPexec::m_map_rank[ i ] << "]" + << " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]" + << std::endl ; + } + } + } + else { + s << " not initialized" << std::endl ; + } +} + +int OpenMP::concurrency() { + return thread_pool_size(0); +} + +} // namespace Kokkos + +#endif //KOKKOS_HAVE_OPENMP diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a01c9cb644e86f423409f1eeb56a014b68f87968 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp @@ -0,0 +1,1083 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPEXEC_HPP +#define KOKKOS_OPENMPEXEC_HPP + +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_spinwait.hpp> + +#include <Kokkos_Atomic.hpp> +#include <iostream> +#include <sstream> +#include <fstream> +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +/** \brief Data for OpenMP thread execution */ + +class OpenMPexec { +public: + + enum { MAX_THREAD_COUNT = 4096 }; + +private: + + static OpenMPexec * m_pool[ MAX_THREAD_COUNT ]; // Indexed by: m_pool_rank_rev + + static int m_pool_topo[ 4 ]; + static int m_map_rank[ MAX_THREAD_COUNT ]; + + friend class Kokkos::OpenMP ; + + int const m_pool_rank ; + int const m_pool_rank_rev ; + int const m_scratch_exec_end ; + int const m_scratch_reduce_end ; + int const m_scratch_thread_end ; + + int volatile m_barrier_state ; + + // Members for dynamic scheduling + // Which thread am I stealing from currently + int m_current_steal_target; + // This thread's owned work_range + Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16; + // Team Offset if one thread determines work_range for others + long m_team_work_index; + + // Is this thread stealing (i.e. its owned work_range is exhausted + bool m_stealing; + + OpenMPexec(); + OpenMPexec( const OpenMPexec & ); + OpenMPexec & operator = ( const OpenMPexec & ); + + static void clear_scratch(); + +public: + + // Topology of a cache coherent thread pool: + // TOTAL = NUMA x GRAIN + // pool_size( depth = 0 ) + // pool_size(0) = total number of threads + // pool_size(1) = number of threads per NUMA + // pool_size(2) = number of threads sharing finest grain memory hierarchy + + inline static + int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; } + + inline static + OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; } + + inline int pool_rank() const { return m_pool_rank ; } + inline int pool_rank_rev() const { return m_pool_rank_rev ; } + + inline long team_work_index() const { return m_team_work_index ; } + + inline int scratch_reduce_size() const + { return m_scratch_reduce_end - m_scratch_exec_end ; } + + inline int scratch_thread_size() const + { return m_scratch_thread_end - m_scratch_reduce_end ; } + + inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; } + inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; } + + inline + void state_wait( int state ) + { Impl::spinwait( m_barrier_state , state ); } + + inline + void state_set( int state ) { m_barrier_state = state ; } + + ~OpenMPexec() {} + + OpenMPexec( const int arg_poolRank + , const int arg_scratch_exec_size + , const int arg_scratch_reduce_size + , const int arg_scratch_thread_size ) + : m_pool_rank( arg_poolRank ) + , m_pool_rank_rev( pool_size() - ( arg_poolRank + 1 ) ) + , m_scratch_exec_end( arg_scratch_exec_size ) + , m_scratch_reduce_end( m_scratch_exec_end + arg_scratch_reduce_size ) + , m_scratch_thread_end( m_scratch_reduce_end + arg_scratch_thread_size ) + , m_barrier_state(0) + {} + + static void finalize(); + + static void initialize( const unsigned team_count , + const unsigned threads_per_team , + const unsigned numa_count , + const unsigned cores_per_numa ); + + static void verify_is_process( const char * const ); + static void verify_initialized( const char * const ); + + static void resize_scratch( size_t reduce_size , size_t thread_size ); + + inline static + OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } + + /* Dynamic Scheduling related functionality */ + // Initialize the work range for this thread + inline void set_work_range(const long& begin, const long& end, const long& chunk_size) { + m_work_range.first = (begin+chunk_size-1)/chunk_size; + m_work_range.second = end>0?(end+chunk_size-1)/chunk_size:m_work_range.first; + } + + // Claim and index from this thread's range from the beginning + inline long get_work_index_begin () { + Kokkos::pair<long,long> work_range_new = m_work_range; + Kokkos::pair<long,long> work_range_old = work_range_new; + if(work_range_old.first>=work_range_old.second) + return -1; + + work_range_new.first+=1; + + bool success = false; + while(!success) { + work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); + success = ( (work_range_new == work_range_old) || + (work_range_new.first>=work_range_new.second)); + work_range_old = work_range_new; + work_range_new.first+=1; + } + if(work_range_old.first<work_range_old.second) + return work_range_old.first; + else + return -1; + } + + // Claim and index from this thread's range from the end + inline long get_work_index_end () { + Kokkos::pair<long,long> work_range_new = m_work_range; + Kokkos::pair<long,long> work_range_old = work_range_new; + if(work_range_old.first>=work_range_old.second) + return -1; + work_range_new.second-=1; + bool success = false; + while(!success) { + work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); + success = ( (work_range_new == work_range_old) || + (work_range_new.first>=work_range_new.second) ); + work_range_old = work_range_new; + work_range_new.second-=1; + } + if(work_range_old.first<work_range_old.second) + return work_range_old.second-1; + else + return -1; + } + + // Reset the steal target + inline void reset_steal_target() { + m_current_steal_target = (m_pool_rank+1)%m_pool_topo[0]; + m_stealing = false; + } + + // Reset the steal target + inline void reset_steal_target(int team_size) { + m_current_steal_target = (m_pool_rank_rev+team_size); + if(m_current_steal_target>=m_pool_topo[0]) + m_current_steal_target = 0;//m_pool_topo[0]-1; + m_stealing = false; + } + + // Get a steal target; start with my-rank + 1 and go round robin, until arriving at this threads rank + // Returns -1 fi no active steal target available + inline int get_steal_target() { + while(( m_pool[m_current_steal_target]->m_work_range.second <= + m_pool[m_current_steal_target]->m_work_range.first ) && + (m_current_steal_target!=m_pool_rank) ) { + m_current_steal_target = (m_current_steal_target+1)%m_pool_topo[0]; + } + if(m_current_steal_target == m_pool_rank) + return -1; + else + return m_current_steal_target; + } + + inline int get_steal_target(int team_size) { + + while(( m_pool[m_current_steal_target]->m_work_range.second <= + m_pool[m_current_steal_target]->m_work_range.first ) && + (m_current_steal_target!=m_pool_rank_rev) ) { + if(m_current_steal_target + team_size < m_pool_topo[0]) + m_current_steal_target = (m_current_steal_target+team_size); + else + m_current_steal_target = 0; + } + + if(m_current_steal_target == m_pool_rank_rev) + return -1; + else + return m_current_steal_target; + } + + inline long steal_work_index (int team_size = 0) { + long index = -1; + int steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); + while ( (steal_target != -1) && (index == -1)) { + index = m_pool[steal_target]->get_work_index_end(); + if(index == -1) + steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); + } + return index; + } + + // Get a work index. Claim from owned range until its exhausted, then steal from other thread + inline long get_work_index (int team_size = 0) { + long work_index = -1; + if(!m_stealing) work_index = get_work_index_begin(); + + if( work_index == -1) { + memory_fence(); + m_stealing = true; + work_index = steal_work_index(team_size); + } + m_team_work_index = work_index; + memory_fence(); + return work_index; + } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +class OpenMPexecTeamMember { +public: + + enum { TEAM_REDUCE_SIZE = 512 }; + + /** \brief Thread states for team synchronization */ + enum { Active = 0 , Rendezvous = 1 }; + + typedef Kokkos::OpenMP execution_space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; + + Impl::OpenMPexec & m_exec ; + scratch_memory_space m_team_shared ; + int m_team_scratch_size[2] ; + int m_team_base_rev ; + int m_team_rank_rev ; + int m_team_rank ; + int m_team_size ; + int m_league_rank ; + int m_league_end ; + int m_league_size ; + + int m_chunk_size; + int m_league_chunk_end; + Impl::OpenMPexec & m_team_lead_exec ; + int m_invalid_thread; + int m_team_alloc; + + // Fan-in team threads, root of the fan-in which does not block returns true + inline + bool team_fan_in() const + { + memory_fence(); + for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { + + m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active ); + } + + if ( m_team_rank_rev ) { + m_exec.state_set( Rendezvous ); + memory_fence(); + m_exec.state_wait( Rendezvous ); + } + + return 0 == m_team_rank_rev ; + } + + inline + void team_fan_out() const + { + memory_fence(); + for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { + m_exec.pool_rev( m_team_base_rev + j )->state_set( Active ); + memory_fence(); + } + } + +public: + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_shmem() const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& thread_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,team_size(),team_rank()) ; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + {} +#else + { + if ( 1 < m_team_size && !m_invalid_thread) { + team_fan_in(); + team_fan_out(); + } + } +#endif + + template<class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast(ValueType& value, const int& thread_id) const + { +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { } +#else + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE + , ValueType , void >::type type ; + + type * const local_value = ((type*) m_exec.scratch_thread()); + if(team_rank() == thread_id) + *local_value = value; + memory_fence(); + team_barrier(); + value = *local_value; +#endif + } + +#ifdef KOKKOS_HAVE_CXX11 + template< class ValueType, class JoinOp > + KOKKOS_INLINE_FUNCTION ValueType + team_reduce( const ValueType & value + , const JoinOp & op_in ) const + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return ValueType(); } + #else + { + memory_fence(); + typedef ValueType value_type; + const JoinLambdaAdapter<value_type,JoinOp> op(op_in); + #endif +#else // KOKKOS_HAVE_CXX11 + template< class JoinOp > + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type + team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return typename JoinOp::value_type(); } + #else + { + typedef typename JoinOp::value_type value_type; + #endif +#endif // KOKKOS_HAVE_CXX11 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE + , value_type , void >::type type ; + + type * const local_value = ((type*) m_exec.scratch_thread()); + + // Set this thread's contribution + *local_value = value ; + + // Fence to make sure the base team member has access: + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); + + // Join to the team value: + for ( int i = 1 ; i < m_team_size ; ++i ) { + op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) ); + } + memory_fence(); + + // The base team member may "lap" the other team members, + // copy to their local value before proceeding. + for ( int i = 1 ; i < m_team_size ; ++i ) { + *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ; + } + + // Fence to make sure all team members have access + memory_fence(); + } + + team_fan_out(); + + return *((type volatile const *)local_value); + } +#endif + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename ArgType > + KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return ArgType(); } +#else + { + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ; + + volatile type * const work_value = ((type*) m_exec.scratch_thread()); + + *work_value = value ; + + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + // m_team_base[0] == highest ranking team member + // m_team_base[ m_team_size - 1 ] == lowest ranking team member + // + // 1) copy from lower to higher rank, initialize lowest rank to zero + // 2) prefix sum from lowest to highest rank, skipping lowest rank + + type accum = 0 ; + + if ( global_accum ) { + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); + accum += val ; + } + accum = atomic_fetch_add( global_accum , accum ); + } + + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); + const type offset = accum ; + accum += val ; + val = offset ; + } + + memory_fence(); + } + + team_fan_out(); + + return *work_value ; + } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const + { return this-> template team_scan<Type>( value , 0 ); } + + //---------------------------------------- + // Private for the driver + +private: + + typedef execution_space::scratch_memory_space space ; + +public: + + template< class ... Properties > + inline + OpenMPexecTeamMember( Impl::OpenMPexec & exec + , const TeamPolicyInternal< OpenMP, Properties ...> & team + , const int shmem_size_L1 + , const int shmem_size_L2 + ) + : m_exec( exec ) + , m_team_shared(0,0) + , m_team_scratch_size{ shmem_size_L1 , shmem_size_L2 } + , m_team_base_rev(0) + , m_team_rank_rev(0) + , m_team_rank(0) + , m_team_size( team.team_size() ) + , m_league_rank(0) + , m_league_end(0) + , m_league_size( team.league_size() ) + , m_chunk_size( team.chunk_size()>0?team.chunk_size():team.team_iter() ) + , m_league_chunk_end(0) + , m_team_lead_exec( *exec.pool_rev( team.team_alloc() * (m_exec.pool_rank_rev()/team.team_alloc()) )) + , m_team_alloc( team.team_alloc()) + { + const int pool_rank_rev = m_exec.pool_rank_rev(); + const int pool_team_rank_rev = pool_rank_rev % team.team_alloc(); + const int pool_league_rank_rev = pool_rank_rev / team.team_alloc(); + const int pool_num_teams = OpenMP::thread_pool_size(0)/team.team_alloc(); + const int chunks_per_team = ( team.league_size() + m_chunk_size*pool_num_teams-1 ) / (m_chunk_size*pool_num_teams); + int league_iter_end = team.league_size() - pool_league_rank_rev * chunks_per_team * m_chunk_size; + int league_iter_begin = league_iter_end - chunks_per_team * m_chunk_size; + if (league_iter_begin < 0) league_iter_begin = 0; + if (league_iter_end>team.league_size()) league_iter_end = team.league_size(); + + if ((team.team_alloc()>m_team_size)? + (pool_team_rank_rev >= m_team_size): + (m_exec.pool_size() - pool_num_teams*m_team_size > m_exec.pool_rank()) + ) + m_invalid_thread = 1; + else + m_invalid_thread = 0; + + m_team_rank_rev = pool_team_rank_rev ; + if ( pool_team_rank_rev < m_team_size && !m_invalid_thread ) { + m_team_base_rev = team.team_alloc() * pool_league_rank_rev ; + m_team_rank_rev = pool_team_rank_rev ; + m_team_rank = m_team_size - ( m_team_rank_rev + 1 ); + m_league_end = league_iter_end ; + m_league_rank = league_iter_begin ; + new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , + ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], + 0 ); + } + + if ( (m_team_rank_rev == 0) && (m_invalid_thread == 0) ) { + m_exec.set_work_range(m_league_rank,m_league_end,m_chunk_size); + m_exec.reset_steal_target(m_team_size); + } + } + + bool valid_static() const + { + return m_league_rank < m_league_end ; + } + + void next_static() + { + if ( m_league_rank < m_league_end ) { + team_barrier(); + new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , + ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], + 0); + } + m_league_rank++; + } + + bool valid_dynamic() { + if(m_invalid_thread) + return false; + if ((m_league_rank < m_league_chunk_end) && (m_league_rank < m_league_size)) { + return true; + } + + if ( m_team_rank_rev == 0 ) { + m_team_lead_exec.get_work_index(m_team_alloc); + } + team_barrier(); + + long work_index = m_team_lead_exec.team_work_index(); + + m_league_rank = work_index * m_chunk_size; + m_league_chunk_end = (work_index +1 ) * m_chunk_size; + + if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size; + + if(m_league_rank>=0) + return true; + return false; + } + + void next_dynamic() { + if(m_invalid_thread) + return; + + if ( m_league_rank < m_league_chunk_end ) { + team_barrier(); + new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , + ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], + 0); + } + m_league_rank++; + } + + static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; } +}; + + + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits<Properties ...> +{ +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits<Properties ... > traits; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return traits::execution_space::thread_pool_size(1); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType & ) + { return traits::execution_space::thread_pool_size(2); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType &, const int& ) + { return traits::execution_space::thread_pool_size(2); } + + //---------------------------------------- + +private: + + int m_league_size ; + int m_team_size ; + int m_team_alloc ; + int m_team_iter ; + + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + + int m_chunk_size; + + inline void init( const int league_size_request + , const int team_size_request ) + { + const int pool_size = traits::execution_space::thread_pool_size(0); + const int team_max = traits::execution_space::thread_pool_size(1); + const int team_grain = traits::execution_space::thread_pool_size(2); + + m_league_size = league_size_request ; + + m_team_size = team_size_request < team_max ? + team_size_request : team_max ; + + // Round team size up to a multiple of 'team_gain' + const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain ); + const int team_count = pool_size / team_size_grain ; + + // Constraint : pool_size = m_team_alloc * team_count + m_team_alloc = pool_size / team_count ; + + // Maxumum number of iterations each team will take: + m_team_iter = ( m_league_size + team_count - 1 ) / team_count ; + + set_auto_chunk_size(); + } + +public: + + inline int team_size() const { return m_team_size ; } + inline int league_size() const { return m_league_size ; } + inline size_t scratch_size(const int& level, int team_size_ = -1) const { + if(team_size_ < 0) + team_size_ = m_team_size; + return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level] ; + } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , int team_size_request + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , team_size_request ); } + + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , traits::execution_space::thread_pool_size(2) ); } + + TeamPolicyInternal( int league_size_request + , int team_size_request + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , team_size_request ); } + + TeamPolicyInternal( int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , traits::execution_space::thread_pool_size(2) ); } + + inline int team_alloc() const { return m_team_alloc ; } + inline int team_iter() const { return m_team_iter ; } + + inline int chunk_size() const { return m_chunk_size ; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; + p.m_chunk_size = chunk_size_; + return p; + } + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + return p; + }; + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + +private: + /** \brief finalize chunk_size if it was set to AUTO*/ + inline void set_auto_chunk_size() { + + int concurrency = traits::execution_space::thread_pool_size(0)/m_team_alloc; + if( concurrency==0 ) concurrency=1; + + if(m_chunk_size > 0) { + if(!Impl::is_integral_power_of_two( m_chunk_size )) + Kokkos::abort("TeamPolicy blocking granularity must be power of two" ); + } + + int new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_league_size) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_league_size ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_chunk_size = new_chunk_size; + } + +public: + typedef Impl::OpenMPexecTeamMember member_type ; +}; +} // namespace Impl + + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +inline +int OpenMP::thread_pool_size( int depth ) +{ + return Impl::OpenMPexec::pool_size(depth); +} + +KOKKOS_INLINE_FUNCTION +int OpenMP::thread_pool_rank() +{ +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Impl::OpenMPexec::m_map_rank[ omp_get_thread_num() ]; +#else + return -1 ; +#endif +} + +} // namespace Kokkos + + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember> + TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember> + TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember > + ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember> PerTeam(const Impl::OpenMPexecTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread); +} +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } + + result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + + init_result = loop_boundaries.thread.team_reduce(result,join); +} + +} //namespace Kokkos + + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& + loop_boundaries, const Lambda& lambda) { + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { + result = ValueType(); +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + init_result = result; +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& + loop_boundaries, const FunctorType & lambda) { + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,scan_val,true); + } +} + +} // namespace Kokkos + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) { + if(single_struct.team_member.team_rank()==0) lambda(); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + lambda(val); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + if(single_struct.team_member.team_rank()==0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +} +} + +#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3123a297c4478a3ec1f48525048945055311f032 --- /dev/null +++ b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp @@ -0,0 +1,511 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_QTHREAD ) + +#include <stdio.h> +#include <stdlib.h> +#include <iostream> +#include <sstream> +#include <utility> +#include <Kokkos_Qthread.hpp> +#include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_Error.hpp> + +// Defines to enable experimental Qthread functionality + +#define QTHREAD_LOCAL_PRIORITY +#define CLONED_TASKS + +#include <qthread/qthread.h> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +enum { MAXIMUM_QTHREAD_WORKERS = 1024 }; + +/** s_exec is indexed by the reverse rank of the workers + * for faster fan-in / fan-out lookups + * [ n - 1 , n - 2 , ... , 0 ] + */ +QthreadExec * s_exec[ MAXIMUM_QTHREAD_WORKERS ]; + +int s_number_shepherds = 0 ; +int s_number_workers_per_shepherd = 0 ; +int s_number_workers = 0 ; + +inline +QthreadExec ** worker_exec() +{ + return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local(NULL) + 1 ); +} + +const int s_base_size = QthreadExec::align_alloc( sizeof(QthreadExec) ); + +int s_worker_reduce_end = 0 ; /* End of worker reduction memory */ +int s_worker_shared_end = 0 ; /* Total of worker scratch memory */ +int s_worker_shared_begin = 0 ; /* Beginning of worker shared memory */ + +QthreadExecFunctionPointer volatile s_active_function = 0 ; +const void * volatile s_active_function_arg = 0 ; + +} /* namespace */ +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +int Qthread::is_initialized() +{ + return Impl::s_number_workers != 0 ; +} + +int Qthread::concurrency() +{ + return Impl::s_number_workers_per_shepherd ; +} + +int Qthread::in_parallel() +{ + return Impl::s_active_function != 0 ; +} + +void Qthread::initialize( int thread_count ) +{ + // Environment variable: QTHREAD_NUM_SHEPHERDS + // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP + // Environment variable: QTHREAD_HWPAR + + { + char buffer[256]; + snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count); + putenv(buffer); + } + + const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && + ( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) && + ( thread_count == qthread_num_workers() ); + + bool ok_symmetry = true ; + + if ( ok_init ) { + Impl::s_number_shepherds = qthread_num_shepherds(); + Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); + Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ; + + for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) { + ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) ); + } + } + + if ( ! ok_init || ! ok_symmetry ) { + std::ostringstream msg ; + + msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ; + msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); + msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD); + msg << " : qthread_num_workers = " << qthread_num_workers(); + + if ( ! ok_symmetry ) { + msg << " : qthread_num_workers_local = {" ; + for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) { + msg << " " << qthread_num_workers_local(i) ; + } + msg << " }" ; + } + + Impl::s_number_workers = 0 ; + Impl::s_number_shepherds = 0 ; + Impl::s_number_workers_per_shepherd = 0 ; + + if ( ok_init ) { qthread_finalize(); } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + Impl::QthreadExec::resize_worker_scratch( 256 , 256 ); + + // Init the array for used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + +} + +void Qthread::finalize() +{ + Impl::QthreadExec::clear_workers(); + + if ( Impl::s_number_workers ) { + qthread_finalize(); + } + + Impl::s_number_workers = 0 ; + Impl::s_number_shepherds = 0 ; + Impl::s_number_workers_per_shepherd = 0 ; +} + +void Qthread::print_configuration( std::ostream & s , const bool detail ) +{ + s << "Kokkos::Qthread {" + << " num_shepherds(" << Impl::s_number_shepherds << ")" + << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")" + << " }" << std::endl ; +} + +Qthread & Qthread::instance( int ) +{ + static Qthread q ; + return q ; +} + +void Qthread::fence() +{ +} + +int Qthread::shepherd_size() const { return Impl::s_number_shepherds ; } +int Qthread::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd ; } + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +aligned_t driver_exec_all( void * arg ) +{ + QthreadExec & exec = **worker_exec(); + + (*s_active_function)( exec , s_active_function_arg ); + +/* + fprintf( stdout + , "QthreadExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , exec.worker_rank() + , exec.worker_size() + , exec.shepherd_rank() + , exec.shepherd_size() + , exec.shepherd_worker_rank() + , exec.shepherd_worker_size() + ); + fflush(stdout); +*/ + + return 0 ; +} + +aligned_t driver_resize_worker_scratch( void * arg ) +{ + static volatile int lock_begin = 0 ; + static volatile int lock_end = 0 ; + + QthreadExec ** const exec = worker_exec(); + + //---------------------------------------- + // Serialize allocation for thread safety + + while ( ! atomic_compare_exchange_strong( & lock_begin , 0 , 1 ) ); // Spin wait to claim lock + + const bool ok = 0 == *exec ; + + if ( ok ) { *exec = (QthreadExec *) malloc( s_base_size + s_worker_shared_end ); } + + lock_begin = 0 ; // release lock + + if ( ok ) { new( *exec ) QthreadExec(); } + + //---------------------------------------- + // Wait for all calls to complete to insure that each worker has executed. + + if ( s_number_workers == 1 + atomic_fetch_add( & lock_end , 1 ) ) { lock_end = 0 ; } + + while ( lock_end ); + +/* + fprintf( stdout + , "QthreadExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , (**exec).worker_rank() + , (**exec).worker_size() + , (**exec).shepherd_rank() + , (**exec).shepherd_size() + , (**exec).shepherd_worker_rank() + , (**exec).shepherd_worker_size() + ); + fflush(stdout); +*/ + + //---------------------------------------- + + if ( ! ok ) { + fprintf( stderr , "Kokkos::QthreadExec resize failed\n" ); + fflush( stderr ); + } + + return 0 ; +} + +void verify_is_process( const char * const label , bool not_active = false ) +{ + const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local(NULL); + const bool is_active = not_active && ( s_active_function || s_active_function_arg ); + + if ( not_process || is_active ) { + std::string msg( label ); + msg.append( " : FAILED" ); + if ( not_process ) msg.append(" : not called by main process"); + if ( is_active ) msg.append(" : parallel execution in progress"); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +} + +int QthreadExec::worker_per_shepherd() +{ + return s_number_workers_per_shepherd ; +} + +QthreadExec::QthreadExec() +{ + const int shepherd_rank = qthread_shep(); + const int shepherd_worker_rank = qthread_worker_local(NULL); + const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank ; + + m_worker_base = s_exec ; + m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) ); + m_scratch_alloc = ( (unsigned char *) this ) + s_base_size ; + m_reduce_end = s_worker_reduce_end ; + m_shepherd_rank = shepherd_rank ; + m_shepherd_size = s_number_shepherds ; + m_shepherd_worker_rank = shepherd_worker_rank ; + m_shepherd_worker_size = s_number_workers_per_shepherd ; + m_worker_rank = worker_rank ; + m_worker_size = s_number_workers ; + m_worker_state = QthreadExec::Active ; +} + +void QthreadExec::clear_workers() +{ + for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { + QthreadExec * const exec = s_exec[iwork] ; + s_exec[iwork] = 0 ; + free( exec ); + } +} + +void QthreadExec::shared_reset( Qthread::scratch_memory_space & space ) +{ + new( & space ) + Qthread::scratch_memory_space( + ((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin , + s_worker_shared_end - s_worker_shared_begin + ); +} + +void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size ) +{ + const int exec_all_reduce_alloc = align_alloc( reduce_size ); + const int shepherd_scan_alloc = align_alloc( 8 ); + const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); + + if ( s_worker_reduce_end < exec_all_reduce_alloc || + s_worker_shared_end < shepherd_shared_end ) { + +/* + fprintf( stdout , "QthreadExec::resize\n"); + fflush(stdout); +*/ + + // Clear current worker memory before allocating new worker memory + clear_workers(); + + // Increase the buffers to an aligned allocation + s_worker_reduce_end = exec_all_reduce_alloc ; + s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ; + s_worker_shared_end = shepherd_shared_end ; + + // Need to query which shepherd this main 'process' is running... + + const int main_shep = qthread_shep(); + + // Have each worker resize its memory for proper first-touch +#if 0 + for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) { + qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep ); + }} +#else + // If this function is used before the 'qthread.task_policy' unit test + // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_resize_worker_scratch /* function */ + , NULL /* function data block */ + , NULL /* pointer to return value feb */ + , jshep /* shepherd number */ + , num_clone - 1 /* number of instances - 1 */ + ); + + assert(ret == QTHREAD_SUCCESS); + } + } +#endif + + driver_resize_worker_scratch( NULL ); + + // Verify all workers allocated + + bool ok = true ; + for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; } + + if ( ! ok ) { + std::ostringstream msg ; + msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ; + for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { + if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } + } + msg << " }" ; + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + } +} + +void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg ) +{ + verify_is_process("QthreadExec::exec_all(...)",true); + +/* + fprintf( stdout , "QthreadExec::exec_all\n"); + fflush(stdout); +*/ + + s_active_function = func ; + s_active_function_arg = arg ; + + // Need to query which shepherd this main 'process' is running... + + const int main_shep = qthread_shep(); + +#if 0 + for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) { + qthread_fork_to( driver_exec_all , NULL , NULL , jshep ); + }} +#else + // If this function is used before the 'qthread.task_policy' unit test + // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_exec_all /* function */ + , NULL /* function data block */ + , NULL /* pointer to return value feb */ + , jshep /* shepherd number */ + , num_clone - 1 /* number of instances - 1 */ + ); + + assert(ret == QTHREAD_SUCCESS); + } + } +#endif + + driver_exec_all( NULL ); + + s_active_function = 0 ; + s_active_function_arg = 0 ; +} + +void * QthreadExec::exec_all_reduce_result() +{ + return s_exec[0]->m_scratch_alloc ; +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +namespace Kokkos { +namespace Impl { + +QthreadTeamPolicyMember::QthreadTeamPolicyMember() + : m_exec( **worker_exec() ) + , m_team_shared(0,0) + , m_team_size( 1 ) + , m_team_rank( 0 ) + , m_league_size(1) + , m_league_end(1) + , m_league_rank(0) +{ + m_exec.shared_reset( m_team_shared ); +} + +QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember::TaskTeam & ) + : m_exec( **worker_exec() ) + , m_team_shared(0,0) + , m_team_size( s_number_workers_per_shepherd ) + , m_team_rank( m_exec.shepherd_worker_rank() ) + , m_league_size(1) + , m_league_end(1) + , m_league_rank(0) +{ + m_exec.shared_reset( m_team_shared ); +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f948eb2903b631e82727e670e84339383d5891c9 --- /dev/null +++ b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp @@ -0,0 +1,620 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_QTHREADEXEC_HPP +#define KOKKOS_QTHREADEXEC_HPP + +#include <impl/Kokkos_spinwait.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +class QthreadExec ; + +typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * ); + +class QthreadExec { +private: + + enum { Inactive = 0 , Active = 1 }; + + const QthreadExec * const * m_worker_base ; + const QthreadExec * const * m_shepherd_base ; + + void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ] + int m_reduce_end ; ///< End of scratch reduction memory + + int m_shepherd_rank ; + int m_shepherd_size ; + + int m_shepherd_worker_rank ; + int m_shepherd_worker_size ; + + /* + * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank + * m_worker_size = m_shepherd_size * m_shepherd_worker_size + */ + int m_worker_rank ; + int m_worker_size ; + + int mutable volatile m_worker_state ; + + + friend class Kokkos::Qthread ; + + ~QthreadExec(); + QthreadExec( const QthreadExec & ); + QthreadExec & operator = ( const QthreadExec & ); + +public: + + QthreadExec(); + + /** Execute the input function on all available Qthread workers */ + static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * ); + + //---------------------------------------- + /** Barrier across all workers participating in the 'exec_all' */ + void exec_all_barrier() const + { + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadExec::Active ; + } + } + + /** Barrier across workers within the shepherd with rank < team_rank */ + void shepherd_barrier( const int team_size ) const + { + if ( m_shepherd_worker_rank < team_size ) { + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; + } + } + } + + //---------------------------------------- + /** Reduce across all workers participating in the 'exec_all' */ + template< class FunctorType , class ReducerType , class ArgTag > + inline + void exec_all_reduce( const FunctorType & func, const ReducerType & reduce ) const + { + typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, ArgTag > ValueJoin ; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + const QthreadExec & fan = *m_worker_base[j]; + + Impl::spinwait( fan.m_worker_state , QthreadExec::Active ); + + ValueJoin::join( ReducerConditional::select(func , reduce) , m_scratch_alloc , fan.m_scratch_alloc ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadExec::Active ; + } + } + + //---------------------------------------- + /** Scall across all workers participating in the 'exec_all' */ + template< class FunctorType , class ArgTag > + inline + void exec_all_scan( const FunctorType & func ) const + { + typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads + // Worker data is in reverse order, so m_worker_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + for ( int i = 1 ; i < m_worker_size ; ++i ) { + ValueOps::copy( func + , m_worker_base[i-1]->m_scratch_alloc + , m_worker_base[i]->m_scratch_alloc + ); + } + + ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc ); + + // Join from lower ranking to higher ranking worker. + // Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2]. + for ( int i = m_worker_size - 1 ; --i > 0 ; ) { + ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc ); + } + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadExec::Active ; + } + } + + //---------------------------------------- + + template< class Type> + inline + volatile Type * shepherd_team_scratch_value() const + { return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); } + + template< class Type > + inline + void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const + { + if ( m_shepherd_base ) { + Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; } + memory_fence(); + shepherd_barrier( team_size ); + value = *shared_value ; + } + } + + template< class Type > + inline + Type shepherd_reduce( const int team_size , const Type & value ) const + { + *shepherd_team_scratch_value<Type>() = value ; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + else { + Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1 ; i < n ; ++i ) { + accum += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + } + for ( int i = 1 ; i < n ; ++i ) { + * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ; + } + + memory_fence(); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; + } + + return *shepherd_team_scratch_value<Type>(); + } + + template< class JoinOp > + inline + typename JoinOp::value_type + shepherd_reduce( const int team_size + , const typename JoinOp::value_type & value + , const JoinOp & op ) const + { + typedef typename JoinOp::value_type Type ; + + *shepherd_team_scratch_value<Type>() = value ; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + else { + volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1 ; i < team_size ; ++i ) { + op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() ); + } + for ( int i = 1 ; i < team_size ; ++i ) { + * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ; + } + + memory_fence(); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; + } + + return *shepherd_team_scratch_value<Type>(); + } + + template< class Type > + inline + Type shepherd_scan( const int team_size + , const Type & value + , Type * const global_value = 0 ) const + { + *shepherd_team_scratch_value<Type>() = value ; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n , j ; + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadExec::Inactive ; + Impl::spinwait( m_worker_state , QthreadExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads + // Worker data is in reverse order, so m_shepherd_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + + Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1 ; i < team_size ; ++i ) { + const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + accum += tmp ; + * m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp ; + } + + * m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() = + global_value ? atomic_fetch_add( global_value , accum ) : 0 ; + + // Join from lower ranking to higher ranking worker. + for ( int i = team_size ; --i ; ) { + * m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + } + + memory_fence(); + } + + for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; + } + + return *shepherd_team_scratch_value<Type>(); + } + + //---------------------------------------- + + static inline + int align_alloc( int size ) + { + enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */}; + enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; + return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ; + } + + void shared_reset( Qthread::scratch_memory_space & ); + + void * exec_all_reduce_value() const { return m_scratch_alloc ; } + + static void * exec_all_reduce_result(); + + static void resize_worker_scratch( const int reduce_size , const int shared_size ); + static void clear_workers(); + + //---------------------------------------- + + inline int worker_rank() const { return m_worker_rank ; } + inline int worker_size() const { return m_worker_size ; } + inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; } + inline int shepherd_worker_size() const { return m_shepherd_worker_size ; } + inline int shepherd_rank() const { return m_shepherd_rank ; } + inline int shepherd_size() const { return m_shepherd_size ; } + + static int worker_per_shepherd(); +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +class QthreadTeamPolicyMember { +private: + + typedef Kokkos::Qthread execution_space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; + + + Impl::QthreadExec & m_exec ; + scratch_memory_space m_team_shared ; + const int m_team_size ; + const int m_team_rank ; + const int m_league_size ; + const int m_league_end ; + int m_league_rank ; + +public: + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const { return m_team_shared ; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + {} +#else + { m_exec.shepherd_barrier( m_team_size ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_broadcast<Type>( value , m_team_size , rank ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_reduce<Type>( m_team_size , value ); } +#endif + + template< typename JoinOp > + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type + team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return typename JoinOp::value_type(); } +#else + { return m_exec.template shepherd_reduce<JoinOp>( m_team_size , value , op ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan<Type>( m_team_size , value ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan<Type>( m_team_size , value , global_accum ); } +#endif + + //---------------------------------------- + // Private driver for task-team parallel + + struct TaskTeam {}; + + QthreadTeamPolicyMember(); + explicit QthreadTeamPolicyMember( const TaskTeam & ); + + //---------------------------------------- + // Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... } + + // Initialize + template< class ... Properties > + QthreadTeamPolicyMember( Impl::QthreadExec & exec + , const Kokkos::Impl::TeamPolicyInternal<Qthread,Properties...> & team ) + : m_exec( exec ) + , m_team_shared(0,0) + , m_team_size( team.m_team_size ) + , m_team_rank( exec.shepherd_worker_rank() ) + , m_league_size( team.m_league_size ) + , m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) ) + , m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 ) + { + m_exec.shared_reset( m_team_shared ); + } + + // Continue + operator bool () const { return m_league_rank < m_league_end ; } + + // iterate + void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); } +}; + + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Qthread , Properties ... > + : public PolicyTraits< Properties... > +{ +private: + + const int m_league_size ; + const int m_team_size ; + const int m_shepherd_iter ; + +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + typedef Qthread execution_space ; + typedef PolicyTraits< Properties ... > traits ; + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return Qthread::instance().shepherd_worker_size(); } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & f ) + { return team_size_max( f ); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType & f , const int& ) + { return team_size_max( f ); } + + //---------------------------------------- + + inline int team_size() const { return m_team_size ; } + inline int league_size() const { return m_league_size ; } + + // One active team per shepherd + TeamPolicyInternal( Kokkos::Qthread & q + , const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < q.shepherd_worker_size() + ? team_size : q.shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) + { + } + + // One active team per shepherd + TeamPolicyInternal( const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < Qthread::instance().shepherd_worker_size() + ? team_size : Qthread::instance().shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() ) + { + } + + typedef Impl::QthreadTeamPolicyMember member_type ; + + friend class Impl::QthreadTeamPolicyMember ; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #define KOKKOS_QTHREADEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5b6419289fc4874f1d97034aa7decd9be0eca147 --- /dev/null +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp @@ -0,0 +1,745 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_QTHREAD_PARALLEL_HPP +#define KOKKOS_QTHREAD_PARALLEL_HPP + +#include <vector> + +#include <Kokkos_Parallel.hpp> + +#include <impl/Kokkos_StaticAssert.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +#include <Qthread/Kokkos_QthreadExec.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Qthread + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + typedef typename Policy::WorkRange WorkRange ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor , const Member ibeg , const Member iend ) + { + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor , const Member ibeg , const Member iend ) + { + const TagType t{} ; + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i ); + } + } + + // Function is called once by every concurrent thread. + static void exec( QthreadExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() ); + + ParallelFor::template exec_range< WorkTag > ( self.m_functor , range.begin() , range.end() ); + + // All threads wait for completion. + exec.exec_all_barrier(); + } + +public: + + inline + void execute() const + { + Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + + } + + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy + ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { } +}; + +//---------------------------------------------------------------------------- + +template< class FunctorType , class ReducerType , class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ... > + , ReducerType + , Kokkos::Qthread + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + // Static Assert WorkTag void if ReducerType not InvalidType + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update ) + { + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i , update ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update ) + { + const TagType t{} ; + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i , update ); + } + } + + static void exec( QthreadExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + + const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() ); + + ParallelReduce::template exec_range< WorkTag >( + self.m_functor, range.begin(), range.end(), + ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) + , exec.exec_all_reduce_value() ) ); + + exec.template exec_all_reduce< FunctorType, ReducerType, WorkTag >( self.m_functor, self.m_reducer ); + } + +public: + + inline + void execute() const + { + QthreadExec::resize_worker_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + + const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data ); + + if ( m_result_ptr ) { + const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; } + } + } + + template< class ViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ViewType & arg_result_view + , typename std::enable_if<Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type< ReducerType >::value + , void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.data() ) + { } + + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { } +}; + +//---------------------------------------------------------------------------- + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , TeamPolicy< Properties ... > + , Kokkos::Qthread > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member ) + { + while ( member ) { + functor( member ); + member.team_barrier(); + member.next_team(); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member ) + { + const TagType t{} ; + while ( member ) { + functor( t , member ); + member.team_barrier(); + member.next_team(); + } + } + + static void exec( QthreadExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + ParallelFor::template exec_team< WorkTag > + ( self.m_functor , Member( exec , self.m_policy ) ); + + exec.exec_all_barrier(); + } + +public: + + inline + void execute() const + { + QthreadExec::resize_worker_scratch + ( /* reduction memory */ 0 + , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); + Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + } + + ParallelFor( const FunctorType & arg_functor , + const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { } +}; + +//---------------------------------------------------------------------------- + +template< class FunctorType , class ReducerType , class ... Properties > +class ParallelReduce< FunctorType + , TeamPolicy< Properties... > + , ReducerType + , Kokkos::Qthread + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + while ( member ) { + functor( member , update ); + member.team_barrier(); + member.next_team(); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + const TagType t{} ; + while ( member ) { + functor( t , member , update ); + member.team_barrier(); + member.next_team(); + } + } + + static void exec( QthreadExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + + ParallelReduce::template exec_team< WorkTag > + ( self.m_functor + , Member( exec , self.m_policy ) + , ValueInit::init( ReducerConditional::select( self.m_functor , self.m_reducer ) + , exec.exec_all_reduce_value() ) ); + + exec.template exec_all_reduce< FunctorType, ReducerType, WorkTag >( self.m_functor, self.m_reducer ); + } + +public: + + inline + void execute() const + { + QthreadExec::resize_worker_scratch + ( /* reduction memory */ ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) + , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); + + Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + + const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data ); + + if ( m_result_ptr ) { + const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; } + } + } + + template< class ViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ViewType & arg_result + , typename std::enable_if<Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type< ReducerType >::value + , void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + { } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Qthread + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i , update , final ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + const TagType t{} ; + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i , update , final ); + } + } + + static void exec( QthreadExec & exec , const void * arg ) + { + const ParallelScan & self = * ((const ParallelScan *) arg ); + + const WorkRange range( self.m_policy , exec.worker_rank() , exec.worker_size() ); + + // Initialize thread-local value + reference_type update = ValueInit::init( self.m_functor , exec.exec_all_reduce_value() ); + + ParallelScan::template exec_range< WorkTag >( self.m_functor, range.begin() , range.end() , update , false ); + + exec.template exec_all_scan< FunctorType , typename Policy::work_tag >( self.m_functor ); + + ParallelScan::template exec_range< WorkTag >( self.m_functor , range.begin() , range.end() , update , true ); + + exec.exec_all_barrier(); + } + +public: + + inline + void execute() const + { + QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 ); + Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::exec , this ); + } + + ParallelScan( const FunctorType & arg_functor + , const Policy & arg_policy + ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember> +TeamThreadRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember> +TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread + , const iType & begin + , const iType & end + ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,begin,end); +} + + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember > + ThreadVectorRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count); +} + + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) { + return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::QthreadTeamPolicyMember& thread) { + return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread); +} + +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } + + result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); +} + +#if defined( KOKKOS_HAVE_CXX11 ) + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + + init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join)); +} + +#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */ + +} // namespace Kokkos + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& + loop_boundaries, const Lambda& lambda) { + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { + result = ValueType(); +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + init_result = result; +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& + loop_boundaries, const FunctorType & lambda) { + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,scan_val,true); + } +} + +} // namespace Kokkos + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) { + if(single_struct.team_member.team_rank()==0) lambda(); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { + lambda(val); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { + if(single_struct.team_member.team_rank()==0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +} + +} // namespace Kokkos + + +#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8cc39d277c1949dc7f9587c09b77d5a71ffddeba --- /dev/null +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp @@ -0,0 +1,491 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_QTHREAD ) + +#include <stdio.h> + +#include <stdlib.h> +#include <stdexcept> +#include <iostream> +#include <sstream> +#include <string> + +#include <Kokkos_Atomic.hpp> +#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp> + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +typedef TaskMember< Kokkos::Qthread , void , void > Task ; + +namespace { + +inline +unsigned padded_sizeof_derived( unsigned sizeof_derived ) +{ + return sizeof_derived + + ( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 ); +} + +// int lock_alloc_dealloc = 0 ; + +} // namespace + +void Task::deallocate( void * ptr ) +{ + // Counting on 'free' thread safety so lock/unlock not required. + // However, isolate calls here to mitigate future need to introduce lock/unlock. + + // lock + + // while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) ); + + free( ptr ); + + // unlock + + // Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 ); +} + +void * Task::allocate( const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity ) +{ + // Counting on 'malloc' thread safety so lock/unlock not required. + // However, isolate calls here to mitigate future need to introduce lock/unlock. + + // lock + + // while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) ); + + void * const ptr = malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) ); + + // unlock + + // Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 ); + + return ptr ; +} + +Task::~TaskMember() +{ + +} + + +Task::TaskMember( const function_verify_type arg_verify + , const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : m_dealloc( arg_dealloc ) + , m_verify( arg_verify ) + , m_apply_single( arg_apply_single ) + , m_apply_team( arg_apply_team ) + , m_active_count( & arg_active_count ) + , m_qfeb(0) + , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) + , m_dep_capacity( arg_dependence_capacity ) + , m_dep_size( 0 ) + , m_ref_count( 0 ) + , m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) +{ + qthread_empty( & m_qfeb ); // Set to full when complete + for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; +} + +Task::TaskMember( const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : m_dealloc( arg_dealloc ) + , m_verify( & Task::verify_type<void> ) + , m_apply_single( arg_apply_single ) + , m_apply_team( arg_apply_team ) + , m_active_count( & arg_active_count ) + , m_qfeb(0) + , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) + , m_dep_capacity( arg_dependence_capacity ) + , m_dep_size( 0 ) + , m_ref_count( 0 ) + , m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) +{ + qthread_empty( & m_qfeb ); // Set to full when complete + for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; +} + +//---------------------------------------------------------------------------- + +void Task::throw_error_add_dependence() const +{ + std::cerr << "TaskMember< Qthread >::add_dependence ERROR" + << " state(" << m_state << ")" + << " dep_size(" << m_dep_size << ")" + << std::endl ; + throw std::runtime_error("TaskMember< Qthread >::add_dependence ERROR"); +} + +void Task::throw_error_verify_type() +{ + throw std::runtime_error("TaskMember< Qthread >::verify_type ERROR"); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) +{ + static const char msg_error_header[] = "Kokkos::Impl::TaskManager<Kokkos::Qthread>::assign ERROR" ; + static const char msg_error_count[] = ": negative reference count" ; + static const char msg_error_complete[] = ": destroy task that is not complete" ; + static const char msg_error_dependences[] = ": destroy task that has dependences" ; + static const char msg_error_exception[] = ": caught internal exception" ; + + if ( rhs ) { Kokkos::atomic_fetch_add( & (*rhs).m_ref_count , 1 ); } + + Task * const lhs_val = Kokkos::atomic_exchange( lhs , rhs ); + + if ( lhs_val ) { + + const int count = Kokkos::atomic_fetch_add( & (*lhs_val).m_ref_count , -1 ); + + const char * msg_error = 0 ; + + try { + + if ( 1 == count ) { + + // Reference count at zero, delete it + + // Should only be deallocating a completed task + if ( (*lhs_val).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) { + + // A completed task should not have dependences... + for ( int i = 0 ; i < (*lhs_val).m_dep_size && 0 == msg_error ; ++i ) { + if ( (*lhs_val).m_dep[i] ) msg_error = msg_error_dependences ; + } + } + else { + msg_error = msg_error_complete ; + } + + if ( 0 == msg_error ) { + // Get deletion function and apply it + const Task::function_dealloc_type d = (*lhs_val).m_dealloc ; + + (*d)( lhs_val ); + } + } + else if ( count <= 0 ) { + msg_error = msg_error_count ; + } + } + catch( ... ) { + if ( 0 == msg_error ) msg_error = msg_error_exception ; + } + + if ( 0 != msg_error ) { + if ( no_throw ) { + std::cerr << msg_error_header << msg_error << std::endl ; + std::cerr.flush(); + } + else { + std::string msg(msg_error_header); + msg.append(msg_error); + throw std::runtime_error( msg ); + } + } + } +} +#endif + + +//---------------------------------------------------------------------------- + +void Task::closeout() +{ + enum { RESPAWN = int( Kokkos::Experimental::TASK_STATE_WAITING ) | + int( Kokkos::Experimental::TASK_STATE_EXECUTING ) }; + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx %s\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast<unsigned long>(this) + , ( m_state == RESPAWN ? "respawn" : "complete" ) + ); +fflush(stdout); +#endif + + // When dependent tasks run there would be a race + // condition between destroying this task and + // querying the active count pointer from this task. + int volatile * const active_count = m_active_count ; + + if ( m_state == RESPAWN ) { + // Task requests respawn, set state to waiting and reschedule the task + m_state = Kokkos::Experimental::TASK_STATE_WAITING ; + schedule(); + } + else { + + // Task did not respawn, is complete + m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ; + + // Release dependences before allowing dependent tasks to run. + // Otherwise there is a thread race condition for removing dependences. + for ( int i = 0 ; i < m_dep_size ; ++i ) { + assign( & m_dep[i] , 0 ); + } + + // Set qthread FEB to full so that dependent tasks are allowed to execute. + // This 'task' may be deleted immediately following this function call. + qthread_fill( & m_qfeb ); + + // The dependent task could now complete and destroy 'this' task + // before the call to 'qthread_fill' returns. Therefore, for + // thread safety assume that 'this' task has now been destroyed. + } + + // Decrement active task count before returning. + Kokkos::atomic_decrement( active_count ); +} + +aligned_t Task::qthread_func( void * arg ) +{ + Task * const task = reinterpret_cast< Task * >(arg); + + // First member of the team change state to executing. + // Use compare-exchange to avoid race condition with a respawn. + Kokkos::atomic_compare_exchange_strong( & task->m_state + , int(Kokkos::Experimental::TASK_STATE_WAITING) + , int(Kokkos::Experimental::TASK_STATE_EXECUTING) + ); + + if ( task->m_apply_team && ! task->m_apply_single ) { + Kokkos::Impl::QthreadTeamPolicyMember::TaskTeam task_team_tag ; + + // Initialize team size and rank with shephered info + Kokkos::Impl::QthreadTeamPolicyMember member( task_team_tag ); + + (*task->m_apply_team)( task , member ); + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast<unsigned long>(task) + , member.team_rank() + , member.team_size() + ); +fflush(stdout); +#endif + + member.team_barrier(); + if ( member.team_rank() == 0 ) task->closeout(); + member.team_barrier(); + } + else if ( task->m_apply_team && task->m_apply_single == reinterpret_cast<function_single_type>(1) ) { + // Team hard-wired to one, no cloning + Kokkos::Impl::QthreadTeamPolicyMember member ; + (*task->m_apply_team)( task , member ); + task->closeout(); + } + else { + (*task->m_apply_single)( task ); + task->closeout(); + } + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx return\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast<unsigned long>(task) + ); +fflush(stdout); +#endif + + return 0 ; +} + +void Task::respawn() +{ + // Change state from pure executing to ( waiting | executing ) + // to avoid confusion with simply waiting. + Kokkos::atomic_compare_exchange_strong( & m_state + , int(Kokkos::Experimental::TASK_STATE_EXECUTING) + , int(Kokkos::Experimental::TASK_STATE_WAITING | + Kokkos::Experimental::TASK_STATE_EXECUTING) + ); +} + +void Task::schedule() +{ + // Is waiting for execution + + // Increment active task count before spawning. + Kokkos::atomic_increment( m_active_count ); + + // spawn in qthread. must malloc the precondition array and give to qthread. + // qthread will eventually free this allocation so memory will not be leaked. + + // concern with thread safety of malloc, does this need to be guarded? + aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) ); + + qprecon[0] = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) ); + + for ( int i = 0 ; i < m_dep_size ; ++i ) { + qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag + } + + if ( m_apply_team && ! m_apply_single ) { + // If more than one shepherd spawn on a shepherd other than this shepherd + const int num_shepherd = qthread_num_shepherds(); + const int num_worker_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); + const int this_shepherd = qthread_shep(); + + int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast<unsigned long>(this) + , spawn_shepherd + , num_worker_per_shepherd - 1 + ); +fflush(stdout); +#endif + + qthread_spawn_cloneable + ( & Task::qthread_func + , this + , 0 + , NULL + , m_dep_size , qprecon /* dependences */ + , spawn_shepherd + , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ) + , num_worker_per_shepherd - 1 + ); + } + else { + qthread_spawn( & Task::qthread_func /* function */ + , this /* function argument */ + , 0 + , NULL + , m_dep_size , qprecon /* dependences */ + , NO_SHEPHERD + , QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ + ); + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { + +TaskPolicy< Kokkos::Qthread >:: +TaskPolicy + ( const unsigned /* arg_task_max_count */ + , const unsigned /* arg_task_max_size */ + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ) + : m_default_dependence_capacity( arg_task_default_dependence_capacity ) + , m_team_size( arg_task_team_size != 0 ? arg_task_team_size : unsigned(qthread_num_workers_local(NO_SHEPHERD)) ) + , m_active_count_root(0) + , m_active_count( m_active_count_root ) +{ + const unsigned num_worker_per_shepherd = unsigned( qthread_num_workers_local(NO_SHEPHERD) ); + + if ( m_team_size != 1 && m_team_size != num_worker_per_shepherd ) { + std::ostringstream msg ; + msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthread >( " + << "default_depedence = " << arg_task_default_dependence_capacity + << " , team_size = " << arg_task_team_size + << " ) ERROR, valid team_size arguments are { (omitted) , 1 , " << num_worker_per_shepherd << " }" ; + Kokkos::Impl::throw_runtime_exception(msg.str()); + } +} + +TaskPolicy< Kokkos::Qthread >::member_type & +TaskPolicy< Kokkos::Qthread >::member_single() +{ + static member_type s ; + return s ; +} + +void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) +{ + volatile int * const active_task_count = & policy.m_active_count ; + while ( *active_task_count ) qthread_yield(); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..22a565503dd59626057bae12ef01cb9abdb994f9 --- /dev/null +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp @@ -0,0 +1,664 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_QTHREAD_TASKPOLICY_HPP +#define KOKKOS_QTHREAD_TASKPOLICY_HPP + +#include <string> +#include <typeinfo> +#include <stdexcept> + +//---------------------------------------------------------------------------- +// Defines to enable experimental Qthread functionality + +#define QTHREAD_LOCAL_PRIORITY +#define CLONED_TASKS + +#include <qthread.h> + +#undef QTHREAD_LOCAL_PRIORITY +#undef CLONED_TASKS + +//---------------------------------------------------------------------------- + +#include <Kokkos_Qthread.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_View.hpp> + +#include <impl/Kokkos_FunctorAdapter.hpp> + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template<> +class TaskMember< Kokkos::Qthread , void , void > +{ +public: + + typedef TaskMember * (* function_verify_type) ( TaskMember * ); + typedef void (* function_single_type) ( TaskMember * ); + typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadTeamPolicyMember & ); + typedef void (* function_dealloc_type)( TaskMember * ); + +private: + + const function_dealloc_type m_dealloc ; ///< Deallocation + const function_verify_type m_verify ; ///< Result type verification + const function_single_type m_apply_single ; ///< Apply function + const function_team_type m_apply_team ; ///< Apply function + int volatile * const m_active_count ; ///< Count of active tasks on this policy + aligned_t m_qfeb ; ///< Qthread full/empty bit + TaskMember ** const m_dep ; ///< Dependences + const int m_dep_capacity ; ///< Capacity of dependences + int m_dep_size ; ///< Actual count of dependences + int m_ref_count ; ///< Reference count + int m_state ; ///< State of the task + + TaskMember() /* = delete */ ; + TaskMember( const TaskMember & ) /* = delete */ ; + TaskMember & operator = ( const TaskMember & ) /* = delete */ ; + + static aligned_t qthread_func( void * arg ); + + static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity ); + static void deallocate( void * ); + + void throw_error_add_dependence() const ; + static void throw_error_verify_type(); + + template < class DerivedTaskType > + static + void deallocate( TaskMember * t ) + { + DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t); + ptr->~DerivedTaskType(); + deallocate( (void *) ptr ); + } + + void schedule(); + void closeout(); + +protected : + + ~TaskMember(); + + // Used by TaskMember< Qthread , ResultType , void > + TaskMember( const function_verify_type arg_verify + , const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ); + + // Used for TaskMember< Qthread , void , void > + TaskMember( const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ); + +public: + + template< typename ResultType > + KOKKOS_FUNCTION static + TaskMember * verify_type( TaskMember * t ) + { + enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value }; + + if ( check_type && t != 0 ) { + + // Verify that t->m_verify is this function + const function_verify_type self = & TaskMember::template verify_type< ResultType > ; + + if ( t->m_verify != self ) { + t = 0 ; +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + throw_error_verify_type(); +#endif + } + } + return t ; + } + + //---------------------------------------- + /* Inheritence Requirements on task types: + * typedef FunctorType::value_type value_type ; + * class DerivedTaskType + * : public TaskMember< Qthread , value_type , FunctorType > + * { ... }; + * class TaskMember< Qthread , value_type , FunctorType > + * : public TaskMember< Qthread , value_type , void > + * , public Functor + * { ... }; + * If value_type != void + * class TaskMember< Qthread , value_type , void > + * : public TaskMember< Qthread , void , void > + * + * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] + * + */ + + /** \brief Allocate and construct a single-thread task */ + template< class DerivedTaskType > + static + TaskMember * create_single( const typename DerivedTaskType::functor_type & arg_functor + , volatile int & arg_active_count + , const unsigned arg_dependence_capacity ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + typedef typename functor_type::value_type value_type ; + + DerivedTaskType * const task = + new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) + DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > + , & TaskMember::template apply_single< functor_type , value_type > + , 0 + , arg_active_count + , sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_functor ); + + return static_cast< TaskMember * >( task ); + } + + /** \brief Allocate and construct a team-thread task */ + template< class DerivedTaskType > + static + TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor + , volatile int & arg_active_count + , const unsigned arg_dependence_capacity + , const bool arg_is_team ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + typedef typename functor_type::value_type value_type ; + + const function_single_type flag = reinterpret_cast<function_single_type>( arg_is_team ? 0 : 1 ); + + DerivedTaskType * const task = + new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) + DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > + , flag + , & TaskMember::template apply_team< functor_type , value_type > + , arg_active_count + , sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_functor ); + + return static_cast< TaskMember * >( task ); + } + + void respawn(); + void spawn() + { + m_state = Kokkos::Experimental::TASK_STATE_WAITING ; + schedule(); + } + + //---------------------------------------- + + typedef FutureValueTypeIsVoidError get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return get_result_type() ; } + + KOKKOS_INLINE_FUNCTION + Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } + + //---------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + static + void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ); +#else + KOKKOS_INLINE_FUNCTION static + void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {} +#endif + + KOKKOS_INLINE_FUNCTION + TaskMember * get_dependence( int i ) const + { return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; } + + KOKKOS_INLINE_FUNCTION + int get_dependence() const + { return m_dep_size ; } + + KOKKOS_INLINE_FUNCTION + void clear_dependence() + { + for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 ); + m_dep_size = 0 ; + } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskMember * before ) + { + if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state || + Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) && + m_dep_size < m_dep_capacity ) { + assign( m_dep + m_dep_size , before ); + ++m_dep_size ; + } + else { + throw_error_add_dependence(); + } + } + + //---------------------------------------- + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + + // TaskMember< Kokkos::Qthread , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthread , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result ); + } + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + + // TaskMember< Kokkos::Qthread , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthread , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m ); + } + + //---------------------------------------- + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t + , Kokkos::Impl::QthreadTeamPolicyMember & member ) + { + typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + + derived_type & m = * static_cast< derived_type * >( t ); + + m.FunctorType::apply( member , m.m_result ); + } + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t + , Kokkos::Impl::QthreadTeamPolicyMember & member ) + { + typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + + derived_type & m = * static_cast< derived_type * >( t ); + + m.FunctorType::apply( member ); + } +}; + +//---------------------------------------------------------------------------- +/** \brief Base class for tasks with a result value in the Qthread execution space. + * + * The FunctorType must be void because this class is accessed by the + * Future class for the task and result value. + * + * Must be derived from TaskMember<S,void,void> 'root class' so the Future class + * can correctly static_cast from the 'root class' to this class. + */ +template < class ResultType > +class TaskMember< Kokkos::Qthread , ResultType , void > + : public TaskMember< Kokkos::Qthread , void , void > +{ +public: + + ResultType m_result ; + + typedef const ResultType & get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return m_result ; } + +protected: + + typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; + typedef task_root_type::function_dealloc_type function_dealloc_type ; + typedef task_root_type::function_single_type function_single_type ; + typedef task_root_type::function_team_type function_team_type ; + + inline + TaskMember( const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : task_root_type( & task_root_type::template verify_type< ResultType > + , arg_dealloc + , arg_apply_single + , arg_apply_team + , arg_active_count + , arg_sizeof_derived + , arg_dependence_capacity ) + , m_result() + {} +}; + +template< class ResultType , class FunctorType > +class TaskMember< Kokkos::Qthread , ResultType , FunctorType > + : public TaskMember< Kokkos::Qthread , ResultType , void > + , public FunctorType +{ +public: + + typedef FunctorType functor_type ; + + typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; + typedef TaskMember< Kokkos::Qthread , ResultType , void > task_base_type ; + typedef task_root_type::function_dealloc_type function_dealloc_type ; + typedef task_root_type::function_single_type function_single_type ; + typedef task_root_type::function_team_type function_team_type ; + + inline + TaskMember( const function_dealloc_type arg_dealloc + , const function_single_type arg_apply_single + , const function_team_type arg_apply_team + , volatile int & arg_active_count + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + , const functor_type & arg_functor + ) + : task_base_type( arg_dealloc + , arg_apply_single + , arg_apply_team + , arg_active_count + , arg_sizeof_derived + , arg_dependence_capacity ) + , functor_type( arg_functor ) + {} +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +void wait( TaskPolicy< Kokkos::Qthread > & ); + +template<> +class TaskPolicy< Kokkos::Qthread > +{ +public: + + typedef Kokkos::Qthread execution_space ; + typedef TaskPolicy execution_policy ; + typedef Kokkos::Impl::QthreadTeamPolicyMember member_type ; + +private: + + typedef Impl::TaskMember< execution_space , void , void > task_root_type ; + + template< class FunctorType > + static inline + const task_root_type * get_task_root( const FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); + } + + template< class FunctorType > + static inline + task_root_type * get_task_root( FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< task_root_type * >( static_cast< task_type * >(f) ); + } + + unsigned m_default_dependence_capacity ; + unsigned m_team_size ; + volatile int m_active_count_root ; + volatile int & m_active_count ; + +public: + + TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity = 4 + , const unsigned arg_task_team_size = 0 /* choose default */ + ); + + KOKKOS_FUNCTION TaskPolicy() = default ; + KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int allocated_task_count() const { return m_active_count ; } + + template< class ValueType > + const Future< ValueType , execution_space > & + spawn( const Future< ValueType , execution_space > & f + , const bool priority = false ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + f.m_task->spawn(); +#endif + return f ; + } + + // Create single-thread task + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type::create_single< task_type > + ( functor + , m_active_count + , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) + ) +#endif + ); + } + + template< class FunctorType > + Future< typename FunctorType::value_type , execution_space > + proc_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { return task_create( functor , dependence_capacity ); } + + // Create thread-team task + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type::create_team< task_type > + ( functor + , m_active_count + , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) + , 1 < m_team_size + ) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + proc_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { return task_create_team( functor , dependence_capacity ); } + + // Add dependence + template< class A1 , class A2 , class A3 , class A4 > + void add_dependence( const Future<A1,A2> & after + , const Future<A3,A4> & before + , typename Kokkos::Impl::enable_if + < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value + && + Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + after.m_task->add_dependence( before.m_task ); +#endif + } + + //---------------------------------------- + // Functions for an executing task functor to query dependences, + // set new dependences, and respawn itself. + + template< class FunctorType > + Future< void , execution_space > + get_dependence( const FunctorType * task_functor , int i ) const + { + return Future<void,execution_space>( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->get_dependence(i) +#endif + ); + } + + template< class FunctorType > + int get_dependence( const FunctorType * task_functor ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return get_task_root(task_functor)->get_dependence(); } +#else + { return 0 ; } +#endif + + template< class FunctorType > + void clear_dependence( FunctorType * task_functor ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->clear_dependence(); +#endif + } + + template< class FunctorType , class A3 , class A4 > + void add_dependence( FunctorType * task_functor + , const Future<A3,A4> & before + , typename Kokkos::Impl::enable_if + < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->add_dependence( before.m_task ); +#endif + } + + template< class FunctorType > + void respawn( FunctorType * task_functor + , const bool priority = false ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->respawn(); +#endif + } + + template< class FunctorType > + void respawn_needing_memory( FunctorType * task_functor ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->respawn(); +#endif + } + + static member_type & member_single(); + + friend void wait( TaskPolicy< Kokkos::Qthread > & ); +}; + +} /* namespace Experimental */ +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #define KOKKOS_QTHREAD_TASK_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/README b/lib/kokkos/core/src/Qthread/README new file mode 100644 index 0000000000000000000000000000000000000000..6e6c86a9efc2680916e2556bda28914833e6749d --- /dev/null +++ b/lib/kokkos/core/src/Qthread/README @@ -0,0 +1,25 @@ + +# This Qthreads back-end uses an experimental branch of the Qthreads repository with special #define options. + +# Cloning repository and branch: + +git clone git@github.com:Qthreads/qthreads.git qthreads + +cd qthreads + +# checkout branch with "cloned tasks" + +git checkout dev-kokkos + +# Configure/autogen + +sh autogen.sh + +# configure with 'hwloc' installation: + +./configure CFLAGS="-DCLONED_TASKS -DQTHREAD_LOCAL_PRIORITY" --with-hwloc=${HWLOCDIR} --prefix=${INSTALLDIR} + +# install + +make install + diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f0b8f70cd8ef36dd153b7bcbb84c42300f4fa6e --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -0,0 +1,826 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) + +#include <stdint.h> +#include <limits> +#include <utility> +#include <iostream> +#include <sstream> +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_CPUDiscovery.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +ThreadsExec s_threads_process ; +ThreadsExec * s_threads_exec[ ThreadsExec::MAX_THREAD_COUNT ] = { 0 }; +pthread_t s_threads_pid[ ThreadsExec::MAX_THREAD_COUNT ] = { 0 }; +std::pair<unsigned,unsigned> s_threads_coord[ ThreadsExec::MAX_THREAD_COUNT ]; + +int s_thread_pool_size[3] = { 0 , 0 , 0 }; + +unsigned s_current_reduce_size = 0 ; +unsigned s_current_shared_size = 0 ; + +void (* volatile s_current_function)( ThreadsExec & , const void * ); +const void * volatile s_current_function_arg = 0 ; + +struct Sentinel { + Sentinel() + { + HostSpace::register_in_parallel( ThreadsExec::in_parallel ); + } + + ~Sentinel() + { + if ( s_thread_pool_size[0] || + s_thread_pool_size[1] || + s_thread_pool_size[2] || + s_current_reduce_size || + s_current_shared_size || + s_current_function || + s_current_function_arg || + s_threads_exec[0] ) { + std::cerr << "ERROR : Process exiting without calling Kokkos::Threads::terminate()" << std::endl ; + } + } +}; + +inline +unsigned fan_size( const unsigned rank , const unsigned size ) +{ + const unsigned rank_rev = size - ( rank + 1 ); + unsigned count = 0 ; + for ( unsigned n = 1 ; ( rank_rev + n < size ) && ! ( rank_rev & n ) ; n <<= 1 ) { ++count ; } + return count ; +} + +} // namespace +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void execute_function_noop( ThreadsExec & , const void * ) {} + +void ThreadsExec::driver(void) +{ + ThreadsExec this_thread ; + + while ( ThreadsExec::Active == this_thread.m_pool_state ) { + + (*s_current_function)( this_thread , s_current_function_arg ); + + // Deactivate thread and wait for reactivation + this_thread.m_pool_state = ThreadsExec::Inactive ; + + wait_yield( this_thread.m_pool_state , ThreadsExec::Inactive ); + } +} + +ThreadsExec::ThreadsExec() + : m_pool_base(0) + , m_scratch(0) + , m_scratch_reduce_end(0) + , m_scratch_thread_end(0) + , m_numa_rank(0) + , m_numa_core_rank(0) + , m_pool_rank(0) + , m_pool_size(0) + , m_pool_fan_size(0) + , m_pool_state( ThreadsExec::Terminating ) +{ + if ( & s_threads_process != this ) { + + // A spawned thread + + ThreadsExec * const nil = 0 ; + + // Which entry in 's_threads_exec', possibly determined from hwloc binding + const int entry = ((size_t)s_current_function_arg) < size_t(s_thread_pool_size[0]) + ? ((size_t)s_current_function_arg) + : size_t(Kokkos::hwloc::bind_this_thread( s_thread_pool_size[0] , s_threads_coord )); + + // Given a good entry set this thread in the 's_threads_exec' array + if ( entry < s_thread_pool_size[0] && + nil == atomic_compare_exchange( s_threads_exec + entry , nil , this ) ) { + + const std::pair<unsigned,unsigned> coord = Kokkos::hwloc::get_this_thread_coordinate(); + + m_numa_rank = coord.first ; + m_numa_core_rank = coord.second ; + m_pool_base = s_threads_exec ; + m_pool_rank = s_thread_pool_size[0] - ( entry + 1 ); + m_pool_rank_rev = s_thread_pool_size[0] - ( pool_rank() + 1 ); + m_pool_size = s_thread_pool_size[0] ; + m_pool_fan_size = fan_size( m_pool_rank , m_pool_size ); + m_pool_state = ThreadsExec::Active ; + + s_threads_pid[ m_pool_rank ] = pthread_self(); + + // Inform spawning process that the threads_exec entry has been set. + s_threads_process.m_pool_state = ThreadsExec::Active ; + } + else { + // Inform spawning process that the threads_exec entry could not be set. + s_threads_process.m_pool_state = ThreadsExec::Terminating ; + } + } + else { + // Enables 'parallel_for' to execute on unitialized Threads device + m_pool_rank = 0 ; + m_pool_size = 1 ; + m_pool_state = ThreadsExec::Inactive ; + + s_threads_pid[ m_pool_rank ] = pthread_self(); + } +} + +ThreadsExec::~ThreadsExec() +{ + const unsigned entry = m_pool_size - ( m_pool_rank + 1 ); + + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + + if ( m_scratch ) { + Record * const r = Record::get_record( m_scratch ); + + m_scratch = 0 ; + + Record::decrement( r ); + } + + m_pool_base = 0 ; + m_scratch_reduce_end = 0 ; + m_scratch_thread_end = 0 ; + m_numa_rank = 0 ; + m_numa_core_rank = 0 ; + m_pool_rank = 0 ; + m_pool_size = 0 ; + m_pool_fan_size = 0 ; + + m_pool_state = ThreadsExec::Terminating ; + + if ( & s_threads_process != this && entry < MAX_THREAD_COUNT ) { + ThreadsExec * const nil = 0 ; + + atomic_compare_exchange( s_threads_exec + entry , this , nil ); + + s_threads_process.m_pool_state = ThreadsExec::Terminating ; + } +} + + +int ThreadsExec::get_thread_count() +{ + return s_thread_pool_size[0] ; +} + +ThreadsExec * ThreadsExec::get_thread( const int init_thread_rank ) +{ + ThreadsExec * const th = + init_thread_rank < s_thread_pool_size[0] + ? s_threads_exec[ s_thread_pool_size[0] - ( init_thread_rank + 1 ) ] : 0 ; + + if ( 0 == th || th->m_pool_rank != init_thread_rank ) { + std::ostringstream msg ; + msg << "Kokkos::Impl::ThreadsExec::get_thread ERROR : " + << "thread " << init_thread_rank << " of " << s_thread_pool_size[0] ; + if ( 0 == th ) { + msg << " does not exist" ; + } + else { + msg << " has wrong thread_rank " << th->m_pool_rank ; + } + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + return th ; +} + +//---------------------------------------------------------------------------- + +void ThreadsExec::execute_sleep( ThreadsExec & exec , const void * ) +{ + ThreadsExec::global_lock(); + ThreadsExec::global_unlock(); + + const int n = exec.m_pool_fan_size ; + const int rank_rev = exec.m_pool_size - ( exec.m_pool_rank + 1 ); + + for ( int i = 0 ; i < n ; ++i ) { + Impl::spinwait( exec.m_pool_base[ rank_rev + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + } + + exec.m_pool_state = ThreadsExec::Inactive ; +} + +} +} + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void ThreadsExec::verify_is_process( const std::string & name , const bool initialized ) +{ + if ( ! is_process() ) { + std::string msg( name ); + msg.append( " FAILED : Called by a worker thread, can only be called by the master process." ); + Kokkos::Impl::throw_runtime_exception( msg ); + } + + if ( initialized && 0 == s_thread_pool_size[0] ) { + std::string msg( name ); + msg.append( " FAILED : Threads not initialized." ); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +int ThreadsExec::in_parallel() +{ + // A thread function is in execution and + // the function argument is not the special threads process argument and + // the master process is a worker or is not the master process. + return s_current_function && + ( & s_threads_process != s_current_function_arg ) && + ( s_threads_process.m_pool_base || ! is_process() ); +} + +// Wait for root thread to become inactive +void ThreadsExec::fence() +{ + if ( s_thread_pool_size[0] ) { + // Wait for the root thread to complete: + Impl::spinwait( s_threads_exec[0]->m_pool_state , ThreadsExec::Active ); + } + + s_current_function = 0 ; + s_current_function_arg = 0 ; + + // Make sure function and arguments are cleared before + // potentially re-activating threads with a subsequent launch. + memory_fence(); +} + +/** \brief Begin execution of the asynchronous functor */ +void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const void * arg ) +{ + verify_is_process("ThreadsExec::start" , true ); + + if ( s_current_function || s_current_function_arg ) { + Kokkos::Impl::throw_runtime_exception( std::string( "ThreadsExec::start() FAILED : already executing" ) ); + } + + s_current_function = func ; + s_current_function_arg = arg ; + + // Make sure function and arguments are written before activating threads. + memory_fence(); + + // Activate threads: + for ( int i = s_thread_pool_size[0] ; 0 < i-- ; ) { + s_threads_exec[i]->m_pool_state = ThreadsExec::Active ; + } + + if ( s_threads_process.m_pool_size ) { + // Master process is the root thread, run it: + (*func)( s_threads_process , arg ); + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + } +} + +//---------------------------------------------------------------------------- + +bool ThreadsExec::sleep() +{ + verify_is_process("ThreadsExec::sleep", true ); + + if ( & execute_sleep == s_current_function ) return false ; + + fence(); + + ThreadsExec::global_lock(); + + s_current_function = & execute_sleep ; + + // Activate threads: + for ( unsigned i = s_thread_pool_size[0] ; 0 < i ; ) { + s_threads_exec[--i]->m_pool_state = ThreadsExec::Active ; + } + + return true ; +} + +bool ThreadsExec::wake() +{ + verify_is_process("ThreadsExec::wake", true ); + + if ( & execute_sleep != s_current_function ) return false ; + + ThreadsExec::global_unlock(); + + if ( s_threads_process.m_pool_base ) { + execute_sleep( s_threads_process , 0 ); + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + } + + fence(); + + return true ; +} + +//---------------------------------------------------------------------------- + +void ThreadsExec::execute_serial( void (*func)( ThreadsExec & , const void * ) ) +{ + s_current_function = func ; + s_current_function_arg = & s_threads_process ; + + // Make sure function and arguments are written before activating threads. + memory_fence(); + + const unsigned begin = s_threads_process.m_pool_base ? 1 : 0 ; + + for ( unsigned i = s_thread_pool_size[0] ; begin < i ; ) { + ThreadsExec & th = * s_threads_exec[ --i ]; + + th.m_pool_state = ThreadsExec::Active ; + + wait_yield( th.m_pool_state , ThreadsExec::Active ); + } + + if ( s_threads_process.m_pool_base ) { + s_threads_process.m_pool_state = ThreadsExec::Active ; + (*func)( s_threads_process , 0 ); + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + } + + s_current_function_arg = 0 ; + s_current_function = 0 ; + + // Make sure function and arguments are cleared before proceeding. + memory_fence(); +} + +//---------------------------------------------------------------------------- + +void * ThreadsExec::root_reduce_scratch() +{ + return s_threads_process.reduce_memory(); +} + +void ThreadsExec::execute_resize_scratch( ThreadsExec & exec , const void * ) +{ + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + + if ( exec.m_scratch ) { + Record * const r = Record::get_record( exec.m_scratch ); + + exec.m_scratch = 0 ; + + Record::decrement( r ); + } + + exec.m_scratch_reduce_end = s_threads_process.m_scratch_reduce_end ; + exec.m_scratch_thread_end = s_threads_process.m_scratch_thread_end ; + + if ( s_threads_process.m_scratch_thread_end ) { + + // Allocate tracked memory: + { + Record * const r = Record::allocate( Kokkos::HostSpace() , "thread_scratch" , s_threads_process.m_scratch_thread_end ); + + Record::increment( r ); + + exec.m_scratch = r->data(); + } + + unsigned * ptr = reinterpret_cast<unsigned *>( exec.m_scratch ); + + unsigned * const end = ptr + s_threads_process.m_scratch_thread_end / sizeof(unsigned); + + // touch on this thread + while ( ptr < end ) *ptr++ = 0 ; + } +} + +void * ThreadsExec::resize_scratch( size_t reduce_size , size_t thread_size ) +{ + enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 }; + + fence(); + + const size_t old_reduce_size = s_threads_process.m_scratch_reduce_end ; + const size_t old_thread_size = s_threads_process.m_scratch_thread_end - s_threads_process.m_scratch_reduce_end ; + + reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ; + thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ; + + // Increase size or deallocate completely. + + if ( ( old_reduce_size < reduce_size ) || + ( old_thread_size < thread_size ) || + ( ( reduce_size == 0 && thread_size == 0 ) && + ( old_reduce_size != 0 || old_thread_size != 0 ) ) ) { + + verify_is_process( "ThreadsExec::resize_scratch" , true ); + + s_threads_process.m_scratch_reduce_end = reduce_size ; + s_threads_process.m_scratch_thread_end = reduce_size + thread_size ; + + execute_serial( & execute_resize_scratch ); + + s_threads_process.m_scratch = s_threads_exec[0]->m_scratch ; + } + + return s_threads_process.m_scratch ; +} + +//---------------------------------------------------------------------------- + +void ThreadsExec::print_configuration( std::ostream & s , const bool detail ) +{ + verify_is_process("ThreadsExec::print_configuration",false); + + fence(); + + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + // Forestall compiler warnings for unused variables. + (void) numa_count; + (void) cores_per_numa; + (void) threads_per_core; + + s << "Kokkos::Threads" ; + +#if defined( KOKKOS_HAVE_PTHREAD ) + s << " KOKKOS_HAVE_PTHREAD" ; +#endif +#if defined( KOKKOS_HAVE_HWLOC ) + s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ; +#endif + + if ( s_thread_pool_size[0] ) { + s << " threads[" << s_thread_pool_size[0] << "]" + << " threads_per_numa[" << s_thread_pool_size[1] << "]" + << " threads_per_core[" << s_thread_pool_size[2] << "]" + ; + if ( 0 == s_threads_process.m_pool_base ) { s << " Asynchronous" ; } + s << " ReduceScratch[" << s_current_reduce_size << "]" + << " SharedScratch[" << s_current_shared_size << "]" ; + s << std::endl ; + + if ( detail ) { + + for ( int i = 0 ; i < s_thread_pool_size[0] ; ++i ) { + + ThreadsExec * const th = s_threads_exec[i] ; + + if ( th ) { + + const int rank_rev = th->m_pool_size - ( th->m_pool_rank + 1 ); + + s << " Thread[ " << th->m_pool_rank << " : " + << th->m_numa_rank << "." << th->m_numa_core_rank << " ]" ; + + s << " Fan{" ; + for ( int j = 0 ; j < th->m_pool_fan_size ; ++j ) { + ThreadsExec * const thfan = th->m_pool_base[rank_rev+(1<<j)] ; + s << " [ " << thfan->m_pool_rank << " : " + << thfan->m_numa_rank << "." << thfan->m_numa_core_rank << " ]" ; + } + s << " }" ; + + if ( th == & s_threads_process ) { + s << " is_process" ; + } + } + s << std::endl ; + } + } + } + else { + s << " not initialized" << std::endl ; + } +} + +//---------------------------------------------------------------------------- + +int ThreadsExec::is_initialized() +{ return 0 != s_threads_exec[0] ; } + +void ThreadsExec::initialize( unsigned thread_count , + unsigned use_numa_count , + unsigned use_cores_per_numa , + bool allow_asynchronous_threadpool ) +{ + static const Sentinel sentinel ; + + const bool is_initialized = 0 != s_thread_pool_size[0] ; + + unsigned thread_spawn_failed = 0 ; + + for ( int i = 0; i < ThreadsExec::MAX_THREAD_COUNT ; i++) + s_threads_exec[i] = NULL; + + if ( ! is_initialized ) { + + // If thread_count, use_numa_count, or use_cores_per_numa are zero + // then they will be given default values based upon hwloc detection + // and allowed asynchronous execution. + + const bool hwloc_avail = Kokkos::hwloc::available(); + const bool hwloc_can_bind = hwloc_avail && Kokkos::hwloc::can_bind_threads(); + + if ( thread_count == 0 ) { + thread_count = hwloc_avail + ? Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core() + : 1 ; + } + + const unsigned thread_spawn_begin = + hwloc::thread_mapping( "Kokkos::Threads::initialize" , + allow_asynchronous_threadpool , + thread_count , + use_numa_count , + use_cores_per_numa , + s_threads_coord ); + + const std::pair<unsigned,unsigned> proc_coord = s_threads_coord[0] ; + + if ( thread_spawn_begin ) { + // Synchronous with s_threads_coord[0] as the process core + // Claim entry #0 for binding the process core. + s_threads_coord[0] = std::pair<unsigned,unsigned>(~0u,~0u); + } + + s_thread_pool_size[0] = thread_count ; + s_thread_pool_size[1] = s_thread_pool_size[0] / use_numa_count ; + s_thread_pool_size[2] = s_thread_pool_size[1] / use_cores_per_numa ; + s_current_function = & execute_function_noop ; // Initialization work function + + for ( unsigned ith = thread_spawn_begin ; ith < thread_count ; ++ith ) { + + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + + // If hwloc available then spawned thread will + // choose its own entry in 's_threads_coord' + // otherwise specify the entry. + s_current_function_arg = (void*)static_cast<uintptr_t>( hwloc_can_bind ? ~0u : ith ); + + // Make sure all outstanding memory writes are complete + // before spawning the new thread. + memory_fence(); + + // Spawn thread executing the 'driver()' function. + // Wait until spawned thread has attempted to initialize. + // If spawning and initialization is successfull then + // an entry in 's_threads_exec' will be assigned. + if ( ThreadsExec::spawn() ) { + wait_yield( s_threads_process.m_pool_state , ThreadsExec::Inactive ); + } + if ( s_threads_process.m_pool_state == ThreadsExec::Terminating ) break ; + } + + // Wait for all spawned threads to deactivate before zeroing the function. + + for ( unsigned ith = thread_spawn_begin ; ith < thread_count ; ++ith ) { + // Try to protect against cache coherency failure by casting to volatile. + ThreadsExec * const th = ((ThreadsExec * volatile *)s_threads_exec)[ith] ; + if ( th ) { + wait_yield( th->m_pool_state , ThreadsExec::Active ); + } + else { + ++thread_spawn_failed ; + } + } + + s_current_function = 0 ; + s_current_function_arg = 0 ; + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + + memory_fence(); + + if ( ! thread_spawn_failed ) { + // Bind process to the core on which it was located before spawning occured + if (hwloc_can_bind) { + Kokkos::hwloc::bind_this_thread( proc_coord ); + } + + if ( thread_spawn_begin ) { // Include process in pool. + const std::pair<unsigned,unsigned> coord = Kokkos::hwloc::get_this_thread_coordinate(); + + s_threads_exec[0] = & s_threads_process ; + s_threads_process.m_numa_rank = coord.first ; + s_threads_process.m_numa_core_rank = coord.second ; + s_threads_process.m_pool_base = s_threads_exec ; + s_threads_process.m_pool_rank = thread_count - 1 ; // Reversed for scan-compatible reductions + s_threads_process.m_pool_size = thread_count ; + s_threads_process.m_pool_fan_size = fan_size( s_threads_process.m_pool_rank , s_threads_process.m_pool_size ); + s_threads_pid[ s_threads_process.m_pool_rank ] = pthread_self(); + } + else { + s_threads_process.m_pool_base = 0 ; + s_threads_process.m_pool_rank = 0 ; + s_threads_process.m_pool_size = 0 ; + s_threads_process.m_pool_fan_size = 0 ; + } + + // Initial allocations: + ThreadsExec::resize_scratch( 1024 , 1024 ); + } + else { + s_thread_pool_size[0] = 0 ; + s_thread_pool_size[1] = 0 ; + s_thread_pool_size[2] = 0 ; + } + } + + if ( is_initialized || thread_spawn_failed ) { + + std::ostringstream msg ; + + msg << "Kokkos::Threads::initialize ERROR" ; + + if ( is_initialized ) { + msg << " : already initialized" ; + } + if ( thread_spawn_failed ) { + msg << " : failed to spawn " << thread_spawn_failed << " threads" ; + } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + // Check for over-subscription + if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { + std::cout << "Kokkos::Threads::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; + std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; + std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; + std::cout << " Requested: " << thread_count << " threads per process." << std::endl; + } + + // Init the array for used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif +} + +//---------------------------------------------------------------------------- + +void ThreadsExec::finalize() +{ + verify_is_process("ThreadsExec::finalize",false); + + fence(); + + resize_scratch(0,0); + + const unsigned begin = s_threads_process.m_pool_base ? 1 : 0 ; + + for ( unsigned i = s_thread_pool_size[0] ; begin < i-- ; ) { + + if ( s_threads_exec[i] ) { + + s_threads_exec[i]->m_pool_state = ThreadsExec::Terminating ; + + wait_yield( s_threads_process.m_pool_state , ThreadsExec::Inactive ); + + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + } + + s_threads_pid[i] = 0 ; + } + + if ( s_threads_process.m_pool_base ) { + ( & s_threads_process )->~ThreadsExec(); + s_threads_exec[0] = 0 ; + } + + if (Kokkos::hwloc::can_bind_threads() ) { + Kokkos::hwloc::unbind_this_thread(); + } + + s_thread_pool_size[0] = 0 ; + s_thread_pool_size[1] = 0 ; + s_thread_pool_size[2] = 0 ; + + // Reset master thread to run solo. + s_threads_process.m_numa_rank = 0 ; + s_threads_process.m_numa_core_rank = 0 ; + s_threads_process.m_pool_base = 0 ; + s_threads_process.m_pool_rank = 0 ; + s_threads_process.m_pool_size = 1 ; + s_threads_process.m_pool_fan_size = 0 ; + s_threads_process.m_pool_state = ThreadsExec::Inactive ; + + #if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif +} + +//---------------------------------------------------------------------------- + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +int Threads::concurrency() { + return thread_pool_size(0); +} + +Threads & Threads::instance(int) +{ + static Threads t ; + return t ; +} + +int Threads::thread_pool_size( int depth ) +{ + return Impl::s_thread_pool_size[depth]; +} + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +int Threads::thread_pool_rank() +{ + const pthread_t pid = pthread_self(); + int i = 0; + while ( ( i < Impl::s_thread_pool_size[0] ) && ( pid != Impl::s_threads_pid[i] ) ) { ++i ; } + return i ; +} +#endif + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */ + diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4ec1450d0f8270c735c83e194d6d6243aa2aa415 --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -0,0 +1,631 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADSEXEC_HPP +#define KOKKOS_THREADSEXEC_HPP + +#include <stdio.h> + +#include <utility> +#include <impl/Kokkos_spinwait.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +#include <Kokkos_Atomic.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +class ThreadsExec { +public: + + // Fan array has log_2(NT) reduction threads plus 2 scan threads + // Currently limited to 16k threads. + enum { MAX_FAN_COUNT = 16 }; + enum { MAX_THREAD_COUNT = 1 << ( MAX_FAN_COUNT - 2 ) }; + enum { VECTOR_LENGTH = 8 }; + + /** \brief States of a worker thread */ + enum { Terminating ///< Termination in progress + , Inactive ///< Exists, waiting for work + , Active ///< Exists, performing work + , Rendezvous ///< Exists, waiting in a barrier or reduce + + , ScanCompleted + , ScanAvailable + , ReductionAvailable + }; + +private: + + friend class Kokkos::Threads ; + + // Fan-in operations' root is the highest ranking thread + // to place the 'scan' reduction intermediate values on + // the threads that need them. + // For a simple reduction the thread location is arbitrary. + + ThreadsExec * const * m_pool_base ; ///< Base for pool fan-in + + void * m_scratch ; + int m_scratch_reduce_end ; + int m_scratch_thread_end ; + int m_numa_rank ; + int m_numa_core_rank ; + int m_pool_rank ; + int m_pool_rank_rev ; + int m_pool_size ; + int m_pool_fan_size ; + int volatile m_pool_state ; ///< State for global synchronizations + + // Members for dynamic scheduling + // Which thread am I stealing from currently + int m_current_steal_target; + // This thread's owned work_range + Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16; + // Team Offset if one thread determines work_range for others + long m_team_work_index; + + // Is this thread stealing (i.e. its owned work_range is exhausted + bool m_stealing; + + static void global_lock(); + static void global_unlock(); + static bool spawn(); + + static void execute_resize_scratch( ThreadsExec & , const void * ); + static void execute_sleep( ThreadsExec & , const void * ); + + ThreadsExec( const ThreadsExec & ); + ThreadsExec & operator = ( const ThreadsExec & ); + + static void execute_serial( void (*)( ThreadsExec & , const void * ) ); + +public: + + KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size ; } + KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank ; } + KOKKOS_INLINE_FUNCTION int numa_rank() const { return m_numa_rank ; } + KOKKOS_INLINE_FUNCTION int numa_core_rank() const { return m_numa_core_rank ; } + inline long team_work_index() const { return m_team_work_index ; } + + static int get_thread_count(); + static ThreadsExec * get_thread( const int init_thread_rank ); + + inline void * reduce_memory() const { return m_scratch ; } + KOKKOS_INLINE_FUNCTION void * scratch_memory() const + { return reinterpret_cast<unsigned char *>(m_scratch) + m_scratch_reduce_end ; } + + KOKKOS_INLINE_FUNCTION int volatile & state() { return m_pool_state ; } + KOKKOS_INLINE_FUNCTION ThreadsExec * const * pool_base() const { return m_pool_base ; } + + static void driver(void); + + ~ThreadsExec(); + ThreadsExec(); + + static void * resize_scratch( size_t reduce_size , size_t thread_size ); + + static void * root_reduce_scratch(); + + static bool is_process(); + + static void verify_is_process( const std::string & , const bool initialized ); + + static int is_initialized(); + + static void initialize( unsigned thread_count , + unsigned use_numa_count , + unsigned use_cores_per_numa , + bool allow_asynchronous_threadpool ); + + static void finalize(); + + /* Given a requested team size, return valid team size */ + static unsigned team_size_valid( unsigned ); + + static void print_configuration( std::ostream & , const bool detail = false ); + + //------------------------------------ + + static void wait_yield( volatile int & , const int ); + + //------------------------------------ + // All-thread functions: + + inline + int all_reduce( const int value ) + { + // Make sure there is enough scratch space: + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + + *((volatile int*) reduce_memory()) = value ; + + memory_fence(); + + // Fan-in reduction with highest ranking thread as the root + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + // Wait: Active -> Rendezvous + Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + } + + if ( rev_rank ) { + m_pool_state = ThreadsExec::Rendezvous ; + // Wait: Rendezvous -> Active + Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + } + else { + // Root thread does the reduction and broadcast + + int accum = 0 ; + + for ( int rank = 0 ; rank < m_pool_size ; ++rank ) { + accum += *((volatile int *) get_thread( rank )->reduce_memory()); + } + + for ( int rank = 0 ; rank < m_pool_size ; ++rank ) { + *((volatile int *) get_thread( rank )->reduce_memory()) = accum ; + } + + memory_fence(); + + for ( int rank = 0 ; rank < m_pool_size ; ++rank ) { + get_thread( rank )->m_pool_state = ThreadsExec::Active ; + } + } + + return *((volatile int*) reduce_memory()); + } + + inline + void barrier( ) + { + // Make sure there is enough scratch space: + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + + memory_fence(); + + // Fan-in reduction with highest ranking thread as the root + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + // Wait: Active -> Rendezvous + Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + } + + if ( rev_rank ) { + m_pool_state = ThreadsExec::Rendezvous ; + // Wait: Rendezvous -> Active + Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + } + else { + // Root thread does the reduction and broadcast + + memory_fence(); + + for ( int rank = 0 ; rank < m_pool_size ; ++rank ) { + get_thread( rank )->m_pool_state = ThreadsExec::Active ; + } + } + } + + //------------------------------------ + // All-thread functions: + + template< class FunctorType , class ArgTag > + inline + void fan_in_reduce( const FunctorType & f ) const + { + typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ; + typedef Kokkos::Impl::FunctorFinal< FunctorType , ArgTag > Final ; + + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + + ThreadsExec & fan = *m_pool_base[ rev_rank + ( 1 << i ) ] ; + + Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + + Join::join( f , reduce_memory() , fan.reduce_memory() ); + } + + if ( ! rev_rank ) { + Final::final( f , reduce_memory() ); + } + } + + inline + void fan_in() const + { + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + Impl::spinwait( m_pool_base[rev_rank+(1<<i)]->m_pool_state , ThreadsExec::Active ); + } + } + + template< class FunctorType , class ArgTag > + inline + void scan_large( const FunctorType & f ) + { + // Sequence of states: + // 0) Active : entry and exit state + // 1) ReductionAvailable : reduction value available + // 2) ScanAvailable : inclusive scan value available + // 3) Rendezvous : All threads inclusive scan value are available + // 4) ScanCompleted : exclusive scan value copied + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , ArgTag > Traits ; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > Init ; + + typedef typename Traits::value_type scalar_type ; + + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + const unsigned count = Traits::value_count( f ); + + scalar_type * const work_value = (scalar_type *) reduce_memory(); + + //-------------------------------- + // Fan-in reduction with highest ranking thread as the root + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ]; + + // Wait: Active -> ReductionAvailable (or ScanAvailable) + Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + Join::join( f , work_value , fan.reduce_memory() ); + } + + // Copy reduction value to scan value before releasing from this phase. + for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i] ; } + + if ( rev_rank ) { + + // Set: Active -> ReductionAvailable + m_pool_state = ThreadsExec::ReductionAvailable ; + + // Wait for contributing threads' scan value to be available. + if ( ( 1 << m_pool_fan_size ) < ( m_pool_rank + 1 ) ) { + ThreadsExec & th = *m_pool_base[ rev_rank + ( 1 << m_pool_fan_size ) ] ; + + // Wait: Active -> ReductionAvailable + // Wait: ReductionAvailable -> ScanAvailable + Impl::spinwait( th.m_pool_state , ThreadsExec::Active ); + Impl::spinwait( th.m_pool_state , ThreadsExec::ReductionAvailable ); + + Join::join( f , work_value + count , ((scalar_type *)th.reduce_memory()) + count ); + } + + // This thread has completed inclusive scan + // Set: ReductionAvailable -> ScanAvailable + m_pool_state = ThreadsExec::ScanAvailable ; + + // Wait for all threads to complete inclusive scan + // Wait: ScanAvailable -> Rendezvous + Impl::spinwait( m_pool_state , ThreadsExec::ScanAvailable ); + } + + //-------------------------------- + + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ]; + // Wait: ReductionAvailable -> ScanAvailable + Impl::spinwait( fan.m_pool_state , ThreadsExec::ReductionAvailable ); + // Set: ScanAvailable -> Rendezvous + fan.m_pool_state = ThreadsExec::Rendezvous ; + } + + // All threads have completed the inclusive scan. + // All non-root threads are in the Rendezvous state. + // Threads are free to overwrite their reduction value. + //-------------------------------- + + if ( ( rev_rank + 1 ) < m_pool_size ) { + // Exclusive scan: copy the previous thread's inclusive scan value + + ThreadsExec & th = *m_pool_base[ rev_rank + 1 ] ; // Not the root thread + + const scalar_type * const src_value = ((scalar_type *)th.reduce_memory()) + count ; + + for ( unsigned j = 0 ; j < count ; ++j ) { work_value[j] = src_value[j]; } + } + else { + (void) Init::init( f , work_value ); + } + + //-------------------------------- + // Wait for all threads to copy previous thread's inclusive scan value + // Wait for all threads: Rendezvous -> ScanCompleted + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Rendezvous ); + } + if ( rev_rank ) { + // Set: ScanAvailable -> ScanCompleted + m_pool_state = ThreadsExec::ScanCompleted ; + // Wait: ScanCompleted -> Active + Impl::spinwait( m_pool_state , ThreadsExec::ScanCompleted ); + } + // Set: ScanCompleted -> Active + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + m_pool_base[ rev_rank + (1<<i) ]->m_pool_state = ThreadsExec::Active ; + } + } + + template< class FunctorType , class ArgTag > + inline + void scan_small( const FunctorType & f ) + { + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , ArgTag > Traits ; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > Init ; + + typedef typename Traits::value_type scalar_type ; + + const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); + const unsigned count = Traits::value_count( f ); + + scalar_type * const work_value = (scalar_type *) reduce_memory(); + + //-------------------------------- + // Fan-in reduction with highest ranking thread as the root + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + // Wait: Active -> Rendezvous + Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + } + + for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i]; } + + if ( rev_rank ) { + m_pool_state = ThreadsExec::Rendezvous ; + // Wait: Rendezvous -> Active + Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + } + else { + // Root thread does the thread-scan before releasing threads + + scalar_type * ptr_prev = 0 ; + + for ( int rank = 0 ; rank < m_pool_size ; ++rank ) { + scalar_type * const ptr = (scalar_type *) get_thread( rank )->reduce_memory(); + if ( rank ) { + for ( unsigned i = 0 ; i < count ; ++i ) { ptr[i] = ptr_prev[ i + count ]; } + Join::join( f , ptr + count , ptr ); + } + else { + (void) Init::init( f , ptr ); + } + ptr_prev = ptr ; + } + } + + for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { + m_pool_base[ rev_rank + (1<<i) ]->m_pool_state = ThreadsExec::Active ; + } + } + + //------------------------------------ + /** \brief Wait for previous asynchronous functor to + * complete and release the Threads device. + * Acquire the Threads device and start this functor. + */ + static void start( void (*)( ThreadsExec & , const void * ) , const void * ); + + static int in_parallel(); + static void fence(); + static bool sleep(); + static bool wake(); + + /* Dynamic Scheduling related functionality */ + // Initialize the work range for this thread + inline void set_work_range(const long& begin, const long& end, const long& chunk_size) { + m_work_range.first = (begin+chunk_size-1)/chunk_size; + m_work_range.second = end>0?(end+chunk_size-1)/chunk_size:m_work_range.first; + } + + // Claim and index from this thread's range from the beginning + inline long get_work_index_begin () { + Kokkos::pair<long,long> work_range_new = m_work_range; + Kokkos::pair<long,long> work_range_old = work_range_new; + if(work_range_old.first>=work_range_old.second) + return -1; + + work_range_new.first+=1; + + bool success = false; + while(!success) { + work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); + success = ( (work_range_new == work_range_old) || + (work_range_new.first>=work_range_new.second)); + work_range_old = work_range_new; + work_range_new.first+=1; + } + if(work_range_old.first<work_range_old.second) + return work_range_old.first; + else + return -1; + } + + // Claim and index from this thread's range from the end + inline long get_work_index_end () { + Kokkos::pair<long,long> work_range_new = m_work_range; + Kokkos::pair<long,long> work_range_old = work_range_new; + if(work_range_old.first>=work_range_old.second) + return -1; + work_range_new.second-=1; + bool success = false; + while(!success) { + work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); + success = ( (work_range_new == work_range_old) || + (work_range_new.first>=work_range_new.second) ); + work_range_old = work_range_new; + work_range_new.second-=1; + } + if(work_range_old.first<work_range_old.second) + return work_range_old.second-1; + else + return -1; + } + + // Reset the steal target + inline void reset_steal_target() { + m_current_steal_target = (m_pool_rank+1)%pool_size(); + m_stealing = false; + } + + // Reset the steal target + inline void reset_steal_target(int team_size) { + m_current_steal_target = (m_pool_rank_rev+team_size); + if(m_current_steal_target>=pool_size()) + m_current_steal_target = 0;//pool_size()-1; + m_stealing = false; + } + + // Get a steal target; start with my-rank + 1 and go round robin, until arriving at this threads rank + // Returns -1 fi no active steal target available + inline int get_steal_target() { + while(( m_pool_base[m_current_steal_target]->m_work_range.second <= + m_pool_base[m_current_steal_target]->m_work_range.first ) && + (m_current_steal_target!=m_pool_rank) ) { + m_current_steal_target = (m_current_steal_target+1)%pool_size(); + } + if(m_current_steal_target == m_pool_rank) + return -1; + else + return m_current_steal_target; + } + + inline int get_steal_target(int team_size) { + + while(( m_pool_base[m_current_steal_target]->m_work_range.second <= + m_pool_base[m_current_steal_target]->m_work_range.first ) && + (m_current_steal_target!=m_pool_rank_rev) ) { + if(m_current_steal_target + team_size < pool_size()) + m_current_steal_target = (m_current_steal_target+team_size); + else + m_current_steal_target = 0; + } + + if(m_current_steal_target == m_pool_rank_rev) + return -1; + else + return m_current_steal_target; + } + + inline long steal_work_index (int team_size = 0) { + long index = -1; + int steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); + while ( (steal_target != -1) && (index == -1)) { + index = m_pool_base[steal_target]->get_work_index_end(); + if(index == -1) + steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); + } + return index; + } + + // Get a work index. Claim from owned range until its exhausted, then steal from other thread + inline long get_work_index (int team_size = 0) { + long work_index = -1; + if(!m_stealing) work_index = get_work_index_begin(); + + if( work_index == -1) { + memory_fence(); + m_stealing = true; + work_index = steal_work_index(team_size); + } + + m_team_work_index = work_index; + memory_fence(); + return work_index; + } + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +inline int Threads::in_parallel() +{ return Impl::ThreadsExec::in_parallel(); } + +inline int Threads::is_initialized() +{ return Impl::ThreadsExec::is_initialized(); } + +inline void Threads::initialize( + unsigned threads_count , + unsigned use_numa_count , + unsigned use_cores_per_numa , + bool allow_asynchronous_threadpool ) +{ + Impl::ThreadsExec::initialize( threads_count , use_numa_count , use_cores_per_numa , allow_asynchronous_threadpool ); +} + +inline void Threads::finalize() +{ + Impl::ThreadsExec::finalize(); +} + +inline void Threads::print_configuration( std::ostream & s , const bool detail ) +{ + Impl::ThreadsExec::print_configuration( s , detail ); +} + +inline bool Threads::sleep() +{ return Impl::ThreadsExec::sleep() ; } + +inline bool Threads::wake() +{ return Impl::ThreadsExec::wake() ; } + +inline void Threads::fence() +{ Impl::ThreadsExec::fence() ; } + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #define KOKKOS_THREADSEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ce09248678de618495f3e3e4a9fc75a0ce48e28c --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp @@ -0,0 +1,255 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core_fwd.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_PTHREAD ) + +/* Standard 'C' Linux libraries */ + +#include <pthread.h> +#include <sched.h> +#include <errno.h> + +/* Standard C++ libaries */ + +#include <cstdlib> +#include <string> +#include <iostream> +#include <stdexcept> + +#include <Kokkos_Threads.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +pthread_mutex_t host_internal_pthread_mutex = PTHREAD_MUTEX_INITIALIZER ; + +// Pthreads compatible driver. +// Recovery from an exception would require constant intra-thread health +// verification; which would negatively impact runtime. As such simply +// abort the process. + +void * internal_pthread_driver( void * ) +{ + try { + ThreadsExec::driver(); + } + catch( const std::exception & x ) { + std::cerr << "Exception thrown from worker thread: " << x.what() << std::endl ; + std::cerr.flush(); + std::abort(); + } + catch( ... ) { + std::cerr << "Exception thrown from worker thread" << std::endl ; + std::cerr.flush(); + std::abort(); + } + return NULL ; +} + +} // namespace + +//---------------------------------------------------------------------------- +// Spawn a thread + +bool ThreadsExec::spawn() +{ + bool result = false ; + + pthread_attr_t attr ; + + if ( 0 == pthread_attr_init( & attr ) || + 0 == pthread_attr_setscope( & attr, PTHREAD_SCOPE_SYSTEM ) || + 0 == pthread_attr_setdetachstate( & attr, PTHREAD_CREATE_DETACHED ) ) { + + pthread_t pt ; + + result = 0 == pthread_create( & pt, & attr, internal_pthread_driver, 0 ); + } + + pthread_attr_destroy( & attr ); + + return result ; +} + +//---------------------------------------------------------------------------- + +bool ThreadsExec::is_process() +{ + static const pthread_t master_pid = pthread_self(); + + return pthread_equal( master_pid , pthread_self() ); +} + +void ThreadsExec::global_lock() +{ + pthread_mutex_lock( & host_internal_pthread_mutex ); +} + +void ThreadsExec::global_unlock() +{ + pthread_mutex_unlock( & host_internal_pthread_mutex ); +} + +//---------------------------------------------------------------------------- + +void ThreadsExec::wait_yield( volatile int & flag , const int value ) +{ + while ( value == flag ) { sched_yield(); } +} + +} // namespace Impl +} // namespace Kokkos + +/* end #if defined( KOKKOS_HAVE_PTHREAD ) */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_HAVE_WINTHREAD ) + +/* Windows libraries */ +#include <winsock2.h> +#include <windows.h> +#include <process.h> + +/* Standard C++ libaries */ + +#include <cstdlib> +#include <string> +#include <iostream> +#include <stdexcept> + +#include <Kokkos_Threads.hpp> + +//---------------------------------------------------------------------------- +// Driver for each created pthread + +namespace Kokkos { +namespace Impl { +namespace { + +unsigned WINAPI internal_winthread_driver( void * arg ) +{ + ThreadsExec::driver(); + + return 0 ; +} + +class ThreadLockWindows { +private: + CRITICAL_SECTION m_handle ; + + ~ThreadLockWindows() + { DeleteCriticalSection( & m_handle ); } + + ThreadLockWindows(); + { InitializeCriticalSection( & m_handle ); } + + ThreadLockWindows( const ThreadLockWindows & ); + ThreadLockWindows & operator = ( const ThreadLockWindows & ); + +public: + + static ThreadLockWindows & singleton(); + + void lock() + { EnterCriticalSection( & m_handle ); } + + void unlock() + { LeaveCriticalSection( & m_handle ); } +}; + +ThreadLockWindows & ThreadLockWindows::singleton() +{ static ThreadLockWindows self ; return self ; } + +} // namespace <> +} // namespace Kokkos +} // namespace Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +// Spawn this thread + +bool ThreadsExec::spawn() +{ + unsigned Win32ThreadID = 0 ; + + HANDLE handle = + _beginthreadex(0,0,internal_winthread_driver,0,0, & Win32ThreadID ); + + return ! handle ; +} + +bool ThreadsExec::is_process() { return true ; } + +void ThreadsExec::global_lock() +{ ThreadLockWindows::singleton().lock(); } + +void ThreadsExec::global_unlock() +{ ThreadLockWindows::singleton().unlock(); } + +void ThreadsExec::wait_yield( volatile int & flag , const int value ) {} +{ + while ( value == flag ) { Sleep(0); } +} + +} // namespace Impl +} // namespace Kokkos + +#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3407ffaa54149499d5046ae887a3b415627287b6 --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -0,0 +1,932 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADSTEAM_HPP +#define KOKKOS_THREADSTEAM_HPP + +#include <stdio.h> + +#include <utility> +#include <impl/Kokkos_spinwait.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +#include <Kokkos_Atomic.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< class > struct ThreadsExecAdapter ; + +//---------------------------------------------------------------------------- + +class ThreadsExecTeamMember { +private: + + enum { TEAM_REDUCE_SIZE = 512 }; + + typedef Kokkos::Threads execution_space ; + typedef execution_space::scratch_memory_space space ; + + ThreadsExec * const m_exec ; + ThreadsExec * const * m_team_base ; ///< Base for team fan-in + space m_team_shared ; + int m_team_shared_size ; + int m_team_size ; + int m_team_rank ; + int m_team_rank_rev ; + int m_league_size ; + int m_league_end ; + int m_league_rank ; + + int m_chunk_size; + int m_league_chunk_end; + + int m_invalid_thread; + int m_team_alloc; + + inline + void set_team_shared() + { new( & m_team_shared ) space( ((char *) (*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE , m_team_shared_size ); } + +public: + + // Fan-in and wait until the matching fan-out is called. + // The root thread which does not wait will return true. + // All other threads will return false during the fan-out. + KOKKOS_INLINE_FUNCTION bool team_fan_in() const + { + int n , j ; + + // Wait for fan-in threads + for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) { + Impl::spinwait( m_team_base[j]->state() , ThreadsExec::Active ); + } + + // If not root then wait for release + if ( m_team_rank_rev ) { + m_exec->state() = ThreadsExec::Rendezvous ; + Impl::spinwait( m_exec->state() , ThreadsExec::Rendezvous ); + } + + return ! m_team_rank_rev ; + } + + KOKKOS_INLINE_FUNCTION void team_fan_out() const + { + int n , j ; + for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) { + m_team_base[j]->state() = ThreadsExec::Active ; + } + } + +public: + + KOKKOS_INLINE_FUNCTION static int team_reduce_size() { return TEAM_REDUCE_SIZE ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & team_shmem() const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & team_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & thread_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,team_size(),team_rank()) ; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const + { + team_fan_in(); + team_fan_out(); + } + + template<class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast(ValueType& value, const int& thread_id) const + { +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { } +#else + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE + , ValueType , void >::type type ; + + if ( m_team_base ) { + type * const local_value = ((type*) m_team_base[0]->scratch_memory()); + if(team_rank() == thread_id) *local_value = value; + memory_fence(); + team_barrier(); + value = *local_value; + } +#endif + } + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(Type) < TEAM_REDUCE_SIZE , Type , void >::type type ; + + if ( 0 == m_exec ) return value ; + + *((volatile type*) m_exec->scratch_memory() ) = value ; + + memory_fence(); + + type & accum = *((type *) m_team_base[0]->scratch_memory() ); + + if ( team_fan_in() ) { + for ( int i = 1 ; i < m_team_size ; ++i ) { + accum += *((type *) m_team_base[i]->scratch_memory() ); + } + memory_fence(); + } + + team_fan_out(); + + return accum ; + } +#endif + +#ifdef KOKKOS_HAVE_CXX11 + template< class ValueType, class JoinOp > + KOKKOS_INLINE_FUNCTION ValueType + team_reduce( const ValueType & value + , const JoinOp & op_in ) const + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return ValueType(); } + #else + { + typedef ValueType value_type; + const JoinLambdaAdapter<value_type,JoinOp> op(op_in); + #endif +#else // KOKKOS_HAVE_CXX11 + template< class JoinOp > + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type + team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return typename JoinOp::value_type(); } + #else + { + typedef typename JoinOp::value_type value_type; + #endif +#endif // KOKKOS_HAVE_CXX11 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE + , value_type , void >::type type ; + + if ( 0 == m_exec ) return value ; + + type * const local_value = ((type*) m_exec->scratch_memory()); + + // Set this thread's contribution + *local_value = value ; + + // Fence to make sure the base team member has access: + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + type * const team_value = ((type*) m_team_base[0]->scratch_memory()); + + // Join to the team value: + for ( int i = 1 ; i < m_team_size ; ++i ) { + op.join( *team_value , *((type*) m_team_base[i]->scratch_memory()) ); + } + + // Team base thread may "lap" member threads so copy out to their local value. + for ( int i = 1 ; i < m_team_size ; ++i ) { + *((type*) m_team_base[i]->scratch_memory()) = *team_value ; + } + + // Fence to make sure all team members have access + memory_fence(); + } + + team_fan_out(); + + // Value was changed by the team base + return *((type volatile const *) local_value); + } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename ArgType > + KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return ArgType(); } +#else + { + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ; + + if ( 0 == m_exec ) return type(0); + + volatile type * const work_value = ((type*) m_exec->scratch_memory()); + + *work_value = value ; + + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + // m_team_base[0] == highest ranking team member + // m_team_base[ m_team_size - 1 ] == lowest ranking team member + // + // 1) copy from lower to higher rank, initialize lowest rank to zero + // 2) prefix sum from lowest to highest rank, skipping lowest rank + + type accum = 0 ; + + if ( global_accum ) { + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_team_base[i]->scratch_memory()); + accum += val ; + } + accum = atomic_fetch_add( global_accum , accum ); + } + + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_team_base[i]->scratch_memory()); + const type offset = accum ; + accum += val ; + val = offset ; + } + + memory_fence(); + } + + team_fan_out(); + + return *work_value ; + } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename ArgType > + KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value ) const + { return this-> template team_scan<ArgType>( value , 0 ); } + + + //---------------------------------------- + // Private for the driver + + template< class ... Properties > + ThreadsExecTeamMember( Impl::ThreadsExec * exec + , const TeamPolicyInternal< Kokkos::Threads , Properties ... > & team + , const int shared_size ) + : m_exec( exec ) + , m_team_base(0) + , m_team_shared(0,0) + , m_team_shared_size( shared_size ) + , m_team_size(team.team_size()) + , m_team_rank(0) + , m_team_rank_rev(0) + , m_league_size(0) + , m_league_end(0) + , m_league_rank(0) + , m_chunk_size( team.chunk_size() ) + , m_league_chunk_end(0) + , m_team_alloc( team.team_alloc()) + { + if ( team.league_size() ) { + // Execution is using device-team interface: + + const int pool_rank_rev = m_exec->pool_size() - ( m_exec->pool_rank() + 1 ); + const int team_rank_rev = pool_rank_rev % team.team_alloc(); + const size_t pool_league_size = m_exec->pool_size() / team.team_alloc() ; + const size_t pool_league_rank_rev = pool_rank_rev / team.team_alloc() ; + const size_t pool_league_rank = pool_league_size - ( pool_league_rank_rev + 1 ); + + const int pool_num_teams = m_exec->pool_size()/team.team_alloc(); + const int chunk_size = team.chunk_size()>0?team.chunk_size():team.team_iter(); + const int chunks_per_team = ( team.league_size() + chunk_size*pool_num_teams-1 ) / (chunk_size*pool_num_teams); + int league_iter_end = team.league_size() - pool_league_rank_rev * chunks_per_team * chunk_size; + int league_iter_begin = league_iter_end - chunks_per_team * chunk_size; + if (league_iter_begin < 0) league_iter_begin = 0; + if (league_iter_end>team.league_size()) league_iter_end = team.league_size(); + + if ((team.team_alloc()>m_team_size)? + (team_rank_rev >= m_team_size): + (m_exec->pool_size() - pool_num_teams*m_team_size > m_exec->pool_rank()) + ) + m_invalid_thread = 1; + else + m_invalid_thread = 0; + + // May be using fewer threads per team than a multiple of threads per core, + // some threads will idle. + + if ( team_rank_rev < team.team_size() && !m_invalid_thread) { + + m_team_base = m_exec->pool_base() + team.team_alloc() * pool_league_rank_rev ; + m_team_size = team.team_size() ; + m_team_rank = team.team_size() - ( team_rank_rev + 1 ); + m_team_rank_rev = team_rank_rev ; + m_league_size = team.league_size(); + + m_league_rank = ( team.league_size() * pool_league_rank ) / pool_league_size ; + m_league_end = ( team.league_size() * (pool_league_rank+1) ) / pool_league_size ; + + set_team_shared(); + } + + if ( (m_team_rank_rev == 0) && (m_invalid_thread == 0) ) { + m_exec->set_work_range(m_league_rank,m_league_end,m_chunk_size); + m_exec->reset_steal_target(m_team_size); + } + if(std::is_same<typename TeamPolicyInternal<Kokkos::Threads, Properties ...>::schedule_type::type,Kokkos::Dynamic>::value) { + m_exec->barrier(); + } + } + } + + ThreadsExecTeamMember() + : m_exec(0) + , m_team_base(0) + , m_team_shared(0,0) + , m_team_shared_size(0) + , m_team_size(1) + , m_team_rank(0) + , m_team_rank_rev(0) + , m_league_size(1) + , m_league_end(0) + , m_league_rank(0) + , m_chunk_size(0) + , m_league_chunk_end(0) + , m_invalid_thread(0) + , m_team_alloc(0) + {} + + inline + ThreadsExec & threads_exec_team_base() const { return m_team_base ? **m_team_base : *m_exec ; } + + bool valid_static() const + { return m_league_rank < m_league_end ; } + + void next_static() + { + if ( m_league_rank < m_league_end ) { + team_barrier(); + set_team_shared(); + } + m_league_rank++; + } + + bool valid_dynamic() { + + if(m_invalid_thread) + return false; + if ((m_league_rank < m_league_chunk_end) && (m_league_rank < m_league_size)) { + return true; + } + + if ( m_team_rank_rev == 0 ) { + m_team_base[0]->get_work_index(m_team_alloc); + } + team_barrier(); + + long work_index = m_team_base[0]->team_work_index(); + + m_league_rank = work_index * m_chunk_size; + m_league_chunk_end = (work_index +1 ) * m_chunk_size; + + if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size; + + if(m_league_rank>=0) + return true; + return false; + } + + void next_dynamic() { + if(m_invalid_thread) + return; + + if ( m_league_rank < m_league_chunk_end ) { + team_barrier(); + set_team_shared(); + } + m_league_rank++; + } + + void set_league_shmem( const int arg_league_rank + , const int arg_league_size + , const int arg_shmem_size + ) + { + m_league_rank = arg_league_rank ; + m_league_size = arg_league_size ; + m_team_shared_size = arg_shmem_size ; + set_team_shared(); + } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Threads , Properties ... >: public PolicyTraits<Properties ...> +{ +private: + + int m_league_size ; + int m_team_size ; + int m_team_alloc ; + int m_team_iter ; + + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + + int m_chunk_size; + + inline + void init( const int league_size_request + , const int team_size_request ) + { + const int pool_size = traits::execution_space::thread_pool_size(0); + const int team_max = traits::execution_space::thread_pool_size(1); + const int team_grain = traits::execution_space::thread_pool_size(2); + + m_league_size = league_size_request ; + + m_team_size = team_size_request < team_max ? + team_size_request : team_max ; + + // Round team size up to a multiple of 'team_gain' + const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain ); + const int team_count = pool_size / team_size_grain ; + + // Constraint : pool_size = m_team_alloc * team_count + m_team_alloc = pool_size / team_count ; + + // Maxumum number of iterations each team will take: + m_team_iter = ( m_league_size + team_count - 1 ) / team_count ; + + set_auto_chunk_size(); + } + + +public: + + //! Tag this class as a kokkos execution policy + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits<Properties ... > traits; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return traits::execution_space::thread_pool_size(1); } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & ) + { return traits::execution_space::thread_pool_size(2); } + + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType &, const int& ) + { return traits::execution_space::thread_pool_size(2); } + + //---------------------------------------- + + inline int team_size() const { return m_team_size ; } + inline int team_alloc() const { return m_team_alloc ; } + inline int league_size() const { return m_league_size ; } + inline size_t scratch_size(const int& level, int team_size_ = -1 ) const { + if(team_size_ < 0) + team_size_ = m_team_size; + return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level] ; + } + + inline int team_iter() const { return m_team_iter ; } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , int team_size_request + , int vector_length_request = 1 ) + : m_league_size(0) + , m_team_size(0) + , m_team_alloc(0) + , m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init(league_size_request,team_size_request); (void) vector_length_request; } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_league_size(0) + , m_team_size(0) + , m_team_alloc(0) + , m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init(league_size_request,traits::execution_space::thread_pool_size(2)); } + + TeamPolicyInternal( int league_size_request + , int team_size_request + , int /* vector_length_request */ = 1 ) + : m_league_size(0) + , m_team_size(0) + , m_team_alloc(0) + , m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init(league_size_request,team_size_request); } + + TeamPolicyInternal( int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int /* vector_length_request */ = 1 ) + : m_league_size(0) + , m_team_size(0) + , m_team_alloc(0) + , m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init(league_size_request,traits::execution_space::thread_pool_size(2)); } + + inline int chunk_size() const { return m_chunk_size ; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; + p.m_chunk_size = chunk_size_; + return p; + } + + /** \brief set per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + return p; + }; + + /** \brief set per thread scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + /** \brief set per thread and per team scratch size for a specific level of the scratch hierarchy */ + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + +private: + /** \brief finalize chunk_size if it was set to AUTO*/ + inline void set_auto_chunk_size() { + + int concurrency = traits::execution_space::thread_pool_size(0)/m_team_alloc; + if( concurrency==0 ) concurrency=1; + + if(m_chunk_size > 0) { + if(!Impl::is_integral_power_of_two( m_chunk_size )) + Kokkos::abort("TeamPolicy blocking granularity must be power of two" ); + } + + int new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_league_size) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_league_size ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_chunk_size = new_chunk_size; + } + +public: + + typedef Impl::ThreadsExecTeamMember member_type ; + + friend class Impl::ThreadsExecTeamMember ; +}; + +} /*namespace Impl */ +} /* namespace Kokkos */ + + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember> +TeamThreadRange(const Impl::ThreadsExecTeamMember& thread, const iType& count) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember> +TeamThreadRange( const Impl::ThreadsExecTeamMember& thread + , const iType & begin + , const iType & end + ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,begin,end); +} + + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember > + ThreadVectorRange(const Impl::ThreadsExecTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >(thread,count); +} + + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember> PerTeam(const Impl::ThreadsExecTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember> PerThread(const Impl::ThreadsExecTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>(thread); +} +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } + + result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); +} + +#if defined( KOKKOS_HAVE_CXX11 ) + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + + init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join)); +} + +#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */ + +} //namespace Kokkos + + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& + loop_boundaries, const Lambda& lambda) { + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { + result = ValueType(); +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + init_result = result; +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& + loop_boundaries, const FunctorType & lambda) { + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,scan_val,true); + } +} + +} // namespace Kokkos + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda) { + if(single_struct.team_member.team_rank()==0) lambda(); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + lambda(val); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + if(single_struct.team_member.team_rank()==0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #define KOKKOS_THREADSTEAM_HPP */ + diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1aba00c94b0bffd95f1e09acc22337b96c87eedb --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -0,0 +1,658 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_PARALLEL_HPP +#define KOKKOS_THREADS_PARALLEL_HPP + +#include <vector> +#include <iostream> + +#include <Kokkos_Parallel.hpp> + +#include <impl/Kokkos_StaticAssert.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/* ParallelFor Kokkos::Threads with RangePolicy */ + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend ) + { + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend ) + { + const TagType t{} ; + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i ); + } + } + + static void exec( ThreadsExec & exec , const void * arg ) + { + exec_schedule<typename Policy::schedule_type::type>(exec,arg); + } + + template<class Schedule> + static + typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() ); + + ParallelFor::template exec_range< WorkTag > + ( self.m_functor , range.begin() , range.end() ); + + exec.fan_in(); + } + + template<class Schedule> + static + typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + + while(work_index != -1) { + const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size(); + const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end(); + + ParallelFor::template exec_range< WorkTag > + ( self.m_functor , begin , end ); + work_index = exec.get_work_index(); + } + + exec.fan_in(); + } + +public: + + inline + void execute() const + { + ThreadsExec::start( & ParallelFor::exec , this ); + ThreadsExec::fence(); + } + + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + +//---------------------------------------------------------------------------- +/* ParallelFor Kokkos::Threads with TeamPolicy */ + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Threads, Properties ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + const int m_shared ; + + template< class TagType , class Schedule> + inline static + typename std::enable_if< std::is_same< TagType , void >::value + && std::is_same<Schedule,Kokkos::Static>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + for ( ; member.valid_static() ; member.next_static() ) { + functor( member ); + } + } + + template< class TagType , class Schedule> + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value + && std::is_same<Schedule,Kokkos::Static>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + const TagType t{} ; + for ( ; member.valid_static() ; member.next_static() ) { + functor( t , member ); + } + } + + template< class TagType , class Schedule> + inline static + typename std::enable_if< std::is_same< TagType , void >::value + && std::is_same<Schedule,Kokkos::Dynamic>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + + for ( ; member.valid_dynamic() ; member.next_dynamic() ) { + functor( member ); + } + } + + template< class TagType , class Schedule> + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value + && std::is_same<Schedule,Kokkos::Dynamic>::value >::type + exec_team( const FunctorType & functor , Member member ) + { + const TagType t{} ; + for ( ; member.valid_dynamic() ; member.next_dynamic() ) { + functor( t , member ); + } + } + + static void exec( ThreadsExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + ParallelFor::exec_team< WorkTag , typename Policy::schedule_type::type > + ( self.m_functor , Member( & exec , self.m_policy , self.m_shared ) ); + + exec.barrier(); + exec.fan_in(); + } + +public: + + inline + void execute() const + { + ThreadsExec::resize_scratch( 0 , Policy::member_type::team_reduce_size() + m_shared ); + + ThreadsExec::start( & ParallelFor::exec , this ); + + ThreadsExec::fence(); + } + + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + { } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/* ParallelReduce with Kokkos::Threads and RangePolicy */ + +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ... > + , ReducerType + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member & ibeg , const Member & iend + , reference_type update ) + { + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i , update ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member & ibeg , const Member & iend + , reference_type update ) + { + const TagType t{} ; + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i , update ); + } + } + + static void + exec( ThreadsExec & exec , const void * arg ) { + exec_schedule<typename Policy::schedule_type::type>(exec, arg); + } + + template<class Schedule> + static + typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); + + ParallelReduce::template exec_range< WorkTag > + ( self.m_functor , range.begin() , range.end() + , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); + + exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + } + + template<class Schedule> + static + typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + reference_type update = ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ); + while(work_index != -1) { + const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size(); + const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end(); + ParallelReduce::template exec_range< WorkTag > + ( self.m_functor , begin , end + , update ); + work_index = exec.get_work_index(); + } + + exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + } + +public: + + inline + void execute() const + { + ThreadsExec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + + ThreadsExec::start( & ParallelReduce::exec , this ); + + ThreadsExec::fence(); + + if ( m_result_ptr ) { + + const pointer_type data = + (pointer_type) ThreadsExec::root_reduce_scratch(); + + const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; } + } + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor , + const Policy & arg_policy , + const HostViewType & arg_result_view , + typename std::enable_if< + Kokkos::is_view< HostViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.ptr_on_device() ) + { + static_assert( Kokkos::is_view< HostViewType >::value + , "Kokkos::Threads reduce result must be a View" ); + + static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value + , "Kokkos::Threads reduce result must be a View in HostSpace" ); + } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + +//---------------------------------------------------------------------------- +/* ParallelReduce with Kokkos::Threads and TeamPolicy */ + +template< class FunctorType , class ReducerType, class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Threads, Properties ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + const int m_shared ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + for ( ; member.valid_static() ; member.next_static() ) { + functor( member , update ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const FunctorType & functor , Member member , reference_type update ) + { + const TagType t{} ; + for ( ; member.valid_static() ; member.next_static() ) { + functor( t , member , update ); + } + } + + static void exec( ThreadsExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + + ParallelReduce::template exec_team< WorkTag > + ( self.m_functor , Member( & exec , self.m_policy , self.m_shared ) + , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); + + exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + } + +public: + + inline + void execute() const + { + ThreadsExec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , Policy::member_type::team_reduce_size() + m_shared ); + + ThreadsExec::start( & ParallelReduce::exec , this ); + + ThreadsExec::fence(); + + if ( m_result_ptr ) { + + const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch(); + + const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; } + } + } + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor , + const Policy & arg_policy , + const ViewType & arg_result , + typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + {} + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/* ParallelScan with Kokkos::Threads and RangePolicy */ + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member & ibeg , const Member & iend + , reference_type update , const bool final ) + { + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( i , update , final ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member & ibeg , const Member & iend + , reference_type update , const bool final ) + { + const TagType t{} ; + #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_HAVE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + functor( t , i , update , final ); + } + } + + static void exec( ThreadsExec & exec , const void * arg ) + { + const ParallelScan & self = * ((const ParallelScan *) arg ); + + const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); + + reference_type update = + ValueInit::init( self.m_functor , exec.reduce_memory() ); + + ParallelScan::template exec_range< WorkTag > + ( self.m_functor , range.begin(), range.end(), update, false ); + + // exec.template scan_large<FunctorType,WorkTag>( self.m_functor ); + exec.template scan_small<FunctorType,WorkTag>( self.m_functor ); + + ParallelScan::template exec_range< WorkTag > + ( self.m_functor , range.begin(), range.end(), update, true ); + + exec.fan_in(); + } + +public: + + inline + void execute() const + { + ThreadsExec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 ); + ThreadsExec::start( & ParallelScan::exec , this ); + ThreadsExec::fence(); + } + + ParallelScan( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #define KOKKOS_THREADS_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1599284b297bee7a770d2a6ce87a429a9e5d08a --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp @@ -0,0 +1,930 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#include <stdio.h> +#include <iostream> +#include <sstream> +#include <Kokkos_Core.hpp> +#include <Threads/Kokkos_Threads_TaskPolicy.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#define QLOCK (reinterpret_cast<void*>( ~((uintptr_t)0) )) +#define QDENIED (reinterpret_cast<void*>( ~((uintptr_t)0) - 1 )) + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +void ThreadsTaskPolicyQueue::Destroy::destroy_shared_allocation() +{ + // Verify the queue is empty + + if ( m_policy->m_count_ready || + m_policy->m_team[0] || + m_policy->m_team[1] || + m_policy->m_team[2] || + m_policy->m_serial[0] || + m_policy->m_serial[1] || + m_policy->m_serial[2] ) { + Kokkos::abort("ThreadsTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" ); + } + + m_policy->~ThreadsTaskPolicyQueue(); +} + +//---------------------------------------------------------------------------- + +ThreadsTaskPolicyQueue::~ThreadsTaskPolicyQueue() +{ +} + +ThreadsTaskPolicyQueue::ThreadsTaskPolicyQueue + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ) + : m_space( Kokkos::Threads::memory_space() + , arg_task_max_size * arg_task_max_count * 1.2 + , 16 /* log2(superblock size) */ + ) + , m_team { 0 , 0 , 0 } + , m_serial { 0 , 0 , 0 } + , m_team_size( arg_task_team_size ) + , m_default_dependence_capacity( arg_task_default_dependence_capacity ) + , m_count_ready(0) + , m_count_alloc(0) +{ + const int threads_total = Threads::thread_pool_size(0); + const int threads_per_numa = Threads::thread_pool_size(1); + const int threads_per_core = Threads::thread_pool_size(2); + + if ( 0 == m_team_size ) { + // If a team task then claim for execution until count is zero + // Issue: team collectives cannot assume which pool members are in the team. + // Issue: team must only span a single NUMA region. + + // If more than one thread per core then map cores to work team, + // else map numa to work team. + + if ( 1 < threads_per_core ) m_team_size = threads_per_core ; + else if ( 1 < threads_per_numa ) m_team_size = threads_per_numa ; + else m_team_size = 1 ; + } + + // Verify a valid team size + const bool valid_team_size = + ( 0 < m_team_size && m_team_size <= threads_total ) && + ( + ( 1 == m_team_size ) || + ( threads_per_core == m_team_size ) || + ( threads_per_numa == m_team_size ) + ); + + if ( ! valid_team_size ) { + std::ostringstream msg ; + + msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Threads > ERROR" + << " invalid team_size(" << m_team_size << ")" + << " threads_per_core(" << threads_per_core << ")" + << " threads_per_numa(" << threads_per_numa << ")" + << " threads_total(" << threads_total << ")" + ; + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + Kokkos::memory_fence(); +} + +//---------------------------------------------------------------------------- + +void ThreadsTaskPolicyQueue::driver( Kokkos::Impl::ThreadsExec & exec + , const void * arg ) +{ + // Whole thread pool is calling this function + + typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; + + ThreadsTaskPolicyQueue & self = + * reinterpret_cast< ThreadsTaskPolicyQueue * >( const_cast<void*>(arg) ); + + // Create the thread team member with shared memory for the given task. + + const TeamPolicy< Kokkos::Threads > team_policy( 1 , self.m_team_size ); + + member_type team_member( & exec , team_policy , 0 ); + + Kokkos::Impl::ThreadsExec & exec_team_base = + team_member.threads_exec_team_base(); + + task_root_type * volatile * const task_team_ptr = + reinterpret_cast<task_root_type**>( exec_team_base.reduce_memory() ); + + volatile int * const work_team_ptr = + reinterpret_cast<volatile int*>( task_team_ptr + 1 ); + + // Each team must iterate this loop synchronously + // to insure team-execution of team-task. + + const bool team_lead = team_member.team_fan_in(); + + bool work_team = true ; + + while ( work_team ) { + + task_root_type * task = 0 ; + + // Start here with members in a fan_in state + + if ( team_lead ) { + // Team lead queries the ready count for a team-consistent view. + *work_team_ptr = 0 != self.m_count_ready ; + + // Only the team lead attempts to pop a team task from the queues + for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { + if ( ( i < 2 /* regular queue */ ) + || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) { + task = pop_ready_task( & self.m_team[i] ); + } + } + + *task_team_ptr = task ; + } + + Kokkos::memory_fence(); + + team_member.team_fan_out(); + + work_team = *work_team_ptr ; + + // Query if team acquired a team task + + if ( 0 != ( task = *task_team_ptr ) ) { + // Set shared memory + team_member.set_league_shmem( 0 , 1 , task->m_shmem_size ); + + (*task->m_team)( task , team_member ); + + // The team task called the functor, + // called the team_fan_in(), and + // if completed the team lead destroyed the task functor. + + if ( team_lead ) { + self.complete_executed_task( task ); + } + } + else { + // No team task acquired, each thread try a serial task + // Try the priority queue, then the regular queue. + for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { + if ( ( i < 2 /* regular queue */ ) + || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) { + task = pop_ready_task( & self.m_serial[i] ); + } + } + + if ( 0 != task ) { + + (*task->m_serial)( task ); + + self.complete_executed_task( task ); + } + + team_member.team_fan_in(); + } + } + + team_member.team_fan_out(); + + exec.fan_in(); +} + +//---------------------------------------------------------------------------- + +ThreadsTaskPolicyQueue::task_root_type * +ThreadsTaskPolicyQueue::pop_ready_task( + ThreadsTaskPolicyQueue::task_root_type * volatile * const queue ) +{ + task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); + task_root_type * task = 0 ; + task_root_type * const task_claim = *queue ; + + if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) { + + // Queue is not locked and not null, try to claim head of queue. + // Is a race among threads to claim the queue. + + if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) { + + // Aquired the task which must be in the waiting state. + + const int claim_state = + atomic_compare_exchange( & task_claim->m_state + , int(TASK_STATE_WAITING) + , int(TASK_STATE_EXECUTING) ); + + task_root_type * lock_verify = 0 ; + + if ( claim_state == int(TASK_STATE_WAITING) ) { + + // Transitioned this task from waiting to executing + // Update the queue to the next entry and release the lock + + task_root_type * const next = + *((task_root_type * volatile *) & task_claim->m_next ); + + *((task_root_type * volatile *) & task_claim->m_next ) = 0 ; + + lock_verify = atomic_compare_exchange( queue , q_lock , next ); + } + + if ( ( claim_state != int(TASK_STATE_WAITING) ) | + ( q_lock != lock_verify ) ) { + + fprintf(stderr,"ThreadsTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n" + , (unsigned long) queue + , (unsigned long) task + , claim_state + , ( claim_state != int(TASK_STATE_WAITING) + ? "NOT WAITING" + : "UNLOCK" ) ); + fflush(stderr); + Kokkos::abort("ThreadsTaskPolicyQueue::pop_ready_task"); + } + + task = task_claim ; + } + } + + return task ; +} + +//---------------------------------------------------------------------------- + +void ThreadsTaskPolicyQueue::complete_executed_task( + ThreadsTaskPolicyQueue::task_root_type * task ) +{ + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + // State is either executing or if respawned then waiting, + // try to transition from executing to complete. + // Reads the current value. + + const int state_old = + atomic_compare_exchange( & task->m_state + , int(Kokkos::Experimental::TASK_STATE_EXECUTING) + , int(Kokkos::Experimental::TASK_STATE_COMPLETE) ); + + if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) { + // Task requested a respawn so reschedule it. + // The reference count will be incremented if placed in a queue. + schedule_task( task , false /* not the initial spawn */ ); + } + else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) { + /* Task is complete */ + + // Clear dependences of this task before locking wait queue + + task->clear_dependence(); + + // Stop other tasks from adding themselves to this task's wait queue. + // The wait queue is updated concurrently so guard with an atomic. + + task_root_type * wait_queue = *((task_root_type * volatile *) & task->m_wait ); + task_root_type * wait_queue_old = 0 ; + + do { + wait_queue_old = wait_queue ; + wait_queue = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied ); + } while ( wait_queue_old != wait_queue ); + + // The task has been removed from ready queue and + // execution is complete so decrement the reference count. + // The reference count was incremented by the initial spawning. + // The task may be deleted if this was the last reference. + task_root_type::assign( & task , 0 ); + + // Pop waiting tasks and schedule them + while ( wait_queue ) { + task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; + schedule_task( x , false /* not the initial spawn */ ); + } + } + else { + fprintf( stderr + , "ThreadsTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n" + , (unsigned long)( task ) + , int(state_old) + , task->m_dep_size + ); + fflush( stderr ); + Kokkos::abort("ThreadsTaskPolicyQueue::complete_executed_task" ); + } + + // If the task was respawned it may have already been + // put in a ready queue and the count incremented. + // By decrementing the count last it will never go to zero + // with a ready or executing task. + + atomic_fetch_add( & m_count_ready , -1 ); +} + +//---------------------------------------------------------------------------- + +void ThreadsTaskPolicyQueue::reschedule_task( + ThreadsTaskPolicyQueue::task_root_type * const task ) +{ + // Reschedule transitions from executing back to waiting. + const int old_state = + atomic_compare_exchange( & task->m_state + , int(TASK_STATE_EXECUTING) + , int(TASK_STATE_WAITING) ); + + if ( old_state != int(TASK_STATE_EXECUTING) ) { + + fprintf( stderr + , "ThreadsTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n" + , (unsigned long) task + , old_state + ); + fflush(stderr); + Kokkos::abort("ThreadsTaskPolicyQueue::reschedule" ); + } +} + +void ThreadsTaskPolicyQueue::schedule_task + ( ThreadsTaskPolicyQueue::task_root_type * const task + , const bool initial_spawn ) +{ + task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + //---------------------------------------- + // State is either constructing or already waiting. + // If constructing then transition to waiting. + + { + const int old_state = atomic_compare_exchange( & task->m_state + , int(TASK_STATE_CONSTRUCTING) + , int(TASK_STATE_WAITING) ); + + // Head of linked list of tasks waiting on this task + task_root_type * const waitTask = + *((task_root_type * volatile const *) & task->m_wait ); + + // Member of linked list of tasks waiting on some other task + task_root_type * const next = + *((task_root_type * volatile const *) & task->m_next ); + + // An incomplete and non-executing task has: + // task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING + // task->m_wait != q_denied + // task->m_next == 0 + // + if ( ( q_denied == waitTask ) || + ( 0 != next ) || + ( old_state != int(TASK_STATE_CONSTRUCTING) && + old_state != int(TASK_STATE_WAITING) ) ) { + fprintf(stderr,"ThreadsTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n" + , (unsigned long) task + , old_state + , (unsigned long) waitTask + , (unsigned long) next ); + fflush(stderr); + Kokkos::abort("ThreadsTaskPolicyQueue::schedule" ); + } + } + + //---------------------------------------- + + if ( initial_spawn ) { + // The initial spawn of a task increments the reference count + // for the task's existence in either a waiting or ready queue + // until the task has completed. + // Completing the task's execution is the matching + // decrement of the reference count. + + task_root_type::assign( 0 , task ); + } + + //---------------------------------------- + // Insert this task into a dependence task that is not complete. + // Push on to that task's wait queue. + + bool attempt_insert_in_queue = true ; + + task_root_type * volatile * queue = + task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ; + + for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) { + + task_root_type * const head_value_old = *queue ; + + if ( q_denied == head_value_old ) { + // Wait queue is closed because task is complete, + // try again with the next dependence wait queue. + ++i ; + queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait + : (task_root_type **) 0 ; + } + else { + + // Wait queue is open and not denied. + // Have exclusive access to this task. + // Assign m_next assuming a successfull insertion into the queue. + // Fence the memory assignment before attempting the CAS. + + *((task_root_type * volatile *) & task->m_next ) = head_value_old ; + + memory_fence(); + + // Attempt to insert this task into the queue. + // If fails then continue the attempt. + + attempt_insert_in_queue = + head_value_old != atomic_compare_exchange(queue,head_value_old,task); + } + } + + //---------------------------------------- + // All dependences are complete, insert into the ready list + + if ( attempt_insert_in_queue ) { + + // Increment the count of ready tasks. + // Count will be decremented when task is complete. + + atomic_fetch_add( & m_count_ready , 1 ); + + queue = task->m_queue ; + + while ( attempt_insert_in_queue ) { + + // A locked queue is being popped. + + task_root_type * const head_value_old = *queue ; + + if ( q_lock != head_value_old ) { + // Read the head of ready queue, + // if same as previous value then CAS locks the ready queue + + // Have exclusive access to this task, + // assign to head of queue, assuming successful insert + // Fence assignment before attempting insert. + *((task_root_type * volatile *) & task->m_next ) = head_value_old ; + + memory_fence(); + + attempt_insert_in_queue = + head_value_old != atomic_compare_exchange(queue,head_value_old,task); + } + } + } +} + + +void TaskMember< Kokkos::Threads , void , void >::latch_add( const int k ) +{ + typedef TaskMember< Kokkos::Threads , void , void > task_root_type ; + + task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); + + const bool ok_input = 0 < k ; + + const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k + : k ; + + const bool ok_count = 0 <= count ; + + const int state = 0 != count ? TASK_STATE_WAITING : + atomic_compare_exchange( & m_state + , TASK_STATE_WAITING + , TASK_STATE_COMPLETE ); + + const bool ok_state = state == TASK_STATE_WAITING ; + + if ( ! ok_count || ! ok_state ) { + printf( "ThreadsTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n" + , (unsigned long) this + , k + , ( ! ok_input ? "Non-positive input" : + ( ! ok_count ? "Negative count" : "Bad State" ) ) + , ( ! ok_input ? k : + ( ! ok_count ? count : state ) ) + ); + Kokkos::abort( "ThreadsTaskPolicyQueue::latch_add ERROR" ); + } + else if ( 0 == count ) { + // Stop other tasks from adding themselves to this latch's wait queue. + // The wait queue is updated concurrently so guard with an atomic. + + ThreadsTaskPolicyQueue & policy = *m_policy ; + task_root_type * wait_queue = *((task_root_type * volatile *) &m_wait); + task_root_type * wait_queue_old = 0 ; + + do { + wait_queue_old = wait_queue ; + wait_queue = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied ); + } while ( wait_queue_old != wait_queue ); + + // Pop waiting tasks and schedule them + while ( wait_queue ) { + task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; + policy.schedule_task( x , false /* not initial spawn */ ); + } + } +} + +//---------------------------------------------------------------------------- + +void ThreadsTaskPolicyQueue::deallocate_task( void * ptr , unsigned size_alloc ) +{ +/* + const int n = atomic_fetch_add( & alloc_count , -1 ) - 1 ; + + fprintf( stderr + , "ThreadsTaskPolicyQueue::deallocate_task(0x%lx,%d) count(%d)\n" + , (unsigned long) ptr + , size_alloc + , n + ); + fflush( stderr ); +*/ + + m_space.deallocate( ptr , size_alloc ); + + Kokkos::atomic_decrement( & m_count_alloc ); +} + +ThreadsTaskPolicyQueue::task_root_type * +ThreadsTaskPolicyQueue::allocate_task + ( const unsigned arg_sizeof_task + , const unsigned arg_dep_capacity + , const unsigned arg_team_shmem + ) +{ + const unsigned base_size = arg_sizeof_task + + ( arg_sizeof_task % sizeof(task_root_type*) + ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*) + : 0 ); + + const unsigned dep_capacity + = ~0u == arg_dep_capacity + ? m_default_dependence_capacity + : arg_dep_capacity ; + + const unsigned size_alloc = + base_size + sizeof(task_root_type*) * dep_capacity ; + +#if 0 + // User created task memory pool with an estimate, + // if estimate is to low then report and throw exception. + + if ( m_space.get_min_block_size() < size_alloc ) { + fprintf(stderr,"TaskPolicy<Threads> task allocation requires %d bytes on memory pool with %d byte chunk size\n" + , int(size_alloc) + , int(m_space.get_min_block_size()) + ); + fflush(stderr); + Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::task_allocate"); + } +#endif + + task_root_type * const task = + reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) ); + + if ( task != 0 ) { + + // Initialize task's root and value data structure + // Calling function must copy construct the functor. + + new( (void*) task ) task_root_type(); + + task->m_policy = this ; + task->m_size_alloc = size_alloc ; + task->m_dep_capacity = dep_capacity ; + task->m_shmem_size = arg_team_shmem ; + + if ( dep_capacity ) { + task->m_dep = + reinterpret_cast<task_root_type**>( + reinterpret_cast<unsigned char*>(task) + base_size ); + + for ( unsigned i = 0 ; i < dep_capacity ; ++i ) + task->task_root_type::m_dep[i] = 0 ; + } + + Kokkos::atomic_increment( & m_count_alloc ); + } + return task ; +} + + +//---------------------------------------------------------------------------- + +void ThreadsTaskPolicyQueue::add_dependence + ( ThreadsTaskPolicyQueue::task_root_type * const after + , ThreadsTaskPolicyQueue::task_root_type * const before + ) +{ + if ( ( after != 0 ) && ( before != 0 ) ) { + + int const state = *((volatile const int *) & after->m_state ); + + // Only add dependence during construction or during execution. + // Both tasks must have the same policy. + // Dependence on non-full memory cannot be mixed with any other dependence. + + const bool ok_state = + Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state || + Kokkos::Experimental::TASK_STATE_EXECUTING == state ; + + const bool ok_capacity = + after->m_dep_size < after->m_dep_capacity ; + + const bool ok_policy = + after->m_policy == this && before->m_policy == this ; + + if ( ok_state && ok_capacity && ok_policy ) { + + ++after->m_dep_size ; + + task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before ); + + memory_fence(); + } + else { + +fprintf( stderr + , "ThreadsTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n" + , (unsigned long) after + , (unsigned long) before + , ( ! ok_state ? "Task not constructing or executing" : + ( ! ok_capacity ? "Task Exceeded dependence capacity" + : "Tasks from different policies" + )) ); + +fflush( stderr ); + + Kokkos::abort("ThreadsTaskPolicyQueue::add_dependence ERROR"); + } + } +} + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +TaskPolicy< Kokkos::Threads >::TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size // Application's task size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ) + : m_track() + , m_policy(0) +{ + typedef Kokkos::Experimental::Impl::SharedAllocationRecord + < Kokkos::HostSpace , Impl::ThreadsTaskPolicyQueue::Destroy > record_type ; + + record_type * record = + record_type::allocate( Kokkos::HostSpace() + , "Threads task queue" + , sizeof(Impl::ThreadsTaskPolicyQueue) + ); + + m_policy = + reinterpret_cast< Impl::ThreadsTaskPolicyQueue * >( record->data() ); + + // Tasks are allocated with application's task size + sizeof(task_root_type) + + const size_t full_task_size_estimate = + arg_task_max_size + + sizeof(task_root_type) + + sizeof(task_root_type*) * arg_task_default_dependence_capacity ; + + new( m_policy ) + Impl::ThreadsTaskPolicyQueue( arg_task_max_count + , full_task_size_estimate + , arg_task_default_dependence_capacity + , arg_task_team_size ); + + record->m_destroy.m_policy = m_policy ; + + m_track.assign_allocated_record_to_uninitialized( record ); +} + + +TaskPolicy< Kokkos::Threads >::member_type & +TaskPolicy< Kokkos::Threads >::member_single() +{ + static member_type s ; + return s ; +} + +void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Threads > & policy ) +{ + typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; + + enum { BASE_SHMEM = 1024 }; + + Kokkos::Impl::ThreadsExec::resize_scratch( 0 , member_type::team_reduce_size() + BASE_SHMEM ); + + Kokkos::Impl::ThreadsExec::start( & Impl::ThreadsTaskPolicyQueue::driver + , policy.m_policy ); + + Kokkos::Impl::ThreadsExec::fence(); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +typedef TaskMember< Kokkos::Threads , void , void > Task ; + +//---------------------------------------------------------------------------- + +Task::~TaskMember() +{ +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void Task::assign( Task ** const lhs_ptr , Task * rhs ) +{ + Task * const q_denied = reinterpret_cast<Task*>(QDENIED); + + // Increment rhs reference count. + if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); } + + if ( 0 == lhs_ptr ) return ; + + // Must have exclusive access to *lhs_ptr. + // Assign the pointer and retrieve the previous value. + +#if 1 + + Task * const old_lhs = *lhs_ptr ; + + *lhs_ptr = rhs ; + +#elif 0 + + Task * const old_lhs = *((Task*volatile*)lhs_ptr); + + *((Task*volatile*)lhs_ptr) = rhs ; + + Kokkos::memory_fence(); + +#else + + Task * const old_lhs = atomic_exchange( lhs_ptr , rhs ); + +#endif + + if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) { + Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR different queues"); + } + + if ( old_lhs ) { + + // Decrement former lhs reference count. + // If reference count is zero task must be complete, then delete task. + // Task is ready for deletion when wait == q_denied + int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ; + int const state = old_lhs->m_state ; + Task * const wait = *((Task * const volatile *) & old_lhs->m_wait ); + + const bool ok_count = 0 <= count ; + + // If count == 0 then will be deleting + // and must either be constructing or complete. + const bool ok_state = 0 < count ? true : + ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) || + ( state == int(TASK_STATE_COMPLETE) && wait == q_denied ) ) + && + old_lhs->m_next == 0 && + old_lhs->m_dep_size == 0 ; + + if ( ! ok_count || ! ok_state ) { + + fprintf( stderr , "Kokkos::Impl::TaskManager<Kokkos::Threads>::assign ERROR deleting task(0x%lx) m_ref_count(%d) , m_wait(0x%ld)\n" + , (unsigned long) old_lhs + , count + , (unsigned long) wait ); + fflush(stderr); + Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR deleting"); + } + + if ( count == 0 ) { + // When 'count == 0' this thread has exclusive access to 'old_lhs' + + ThreadsTaskPolicyQueue & queue = *( old_lhs->m_policy ); + + queue.deallocate_task( old_lhs , old_lhs->m_size_alloc ); + } + } +} + +#endif + +//---------------------------------------------------------------------------- + +Task * Task::get_dependence( int i ) const +{ + Task * const t = m_dep[i] ; + + if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) { + +fprintf( stderr + , "TaskMember< Threads >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n" + , (unsigned long) this + , m_state + , m_dep_size + , i + , (unsigned long) t + ); +fflush( stderr ); + + Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::get_dependence ERROR"); + } + + return t ; +} + +//---------------------------------------------------------------------------- + +void Task::clear_dependence() +{ + for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) { + assign( m_dep + i , 0 ); + } + + *((volatile int *) & m_dep_size ) = 0 ; + + memory_fence(); +} + +//---------------------------------------------------------------------------- + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..116d32e4fc4d6c6da2968518caacc133e7488ab4 --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp @@ -0,0 +1,745 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_THREADS_TASKPOLICY_HPP +#define KOKKOS_THREADS_TASKPOLICY_HPP + + +#include <Kokkos_Threads.hpp> +#include <Kokkos_TaskPolicy.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct ThreadsTaskPolicyQueue ; + +/** \brief Base class for all Kokkos::Threads tasks */ +template<> +class TaskMember< Kokkos::Threads , void , void > { +public: + + template < class > friend class Kokkos::Experimental::TaskPolicy ; + friend struct ThreadsTaskPolicyQueue ; + + typedef TaskMember * (* function_verify_type) ( TaskMember * ); + typedef void (* function_single_type) ( TaskMember * ); + typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::ThreadsExecTeamMember & ); + +private: + + + ThreadsTaskPolicyQueue * m_policy ; + TaskMember * volatile * m_queue ; + function_verify_type m_verify ; + function_team_type m_team ; ///< Apply function + function_single_type m_serial ; ///< Apply function + TaskMember ** m_dep ; ///< Dependences + TaskMember * m_wait ; ///< Head of linked list of tasks waiting on this task + TaskMember * m_next ; ///< Member of linked list of tasks + int m_dep_capacity ; ///< Capacity of dependences + int m_dep_size ; ///< Actual count of dependences + int m_size_alloc ; + int m_shmem_size ; + int m_ref_count ; ///< Reference count + int m_state ; ///< State of the task + + + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; + +protected: + + TaskMember() + : m_policy(0) + , m_verify(0) + , m_team(0) + , m_serial(0) + , m_dep(0) + , m_wait(0) + , m_next(0) + , m_dep_capacity(0) + , m_dep_size(0) + , m_size_alloc(0) + , m_shmem_size(0) + , m_ref_count(0) + , m_state( TASK_STATE_CONSTRUCTING ) + {} + +public: + + ~TaskMember(); + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return *((volatile int *) & m_ref_count ); } + + template< typename ResultType > + KOKKOS_FUNCTION static + TaskMember * verify_type( TaskMember * t ) + { + enum { check_type = ! std::is_same< ResultType , void >::value }; + + if ( check_type && t != 0 ) { + + // Verify that t->m_verify is this function + const function_verify_type self = & TaskMember::template verify_type< ResultType > ; + + if ( t->m_verify != self ) { + t = 0 ; + Kokkos::abort("TaskPolicy< Threads > verify_result_type" ); + } + } + return t ; + } + + //---------------------------------------- + /* Inheritence Requirements on task types: + * + * class TaskMember< Threads , DerivedType::value_type , FunctorType > + * : public TaskMember< Threads , DerivedType::value_type , void > + * , public Functor + * { ... }; + * + * If value_type != void + * class TaskMember< Threads , value_type , void > + * : public TaskMember< Threads , void , void > + * + */ + //---------------------------------------- + + template< class DerivedTaskType , class Tag > + KOKKOS_FUNCTION static + void apply_single( + typename std::enable_if + <( std::is_same<Tag,void>::value && + std::is_same< typename DerivedTaskType::result_type , void >::value + ), TaskMember * >::type t ) + { + { + typedef typename DerivedTaskType::functor_type functor_type ; + + functor_type * const f = + static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); + + f->apply(); + + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + } + + template< class DerivedTaskType , class Tag > + KOKKOS_FUNCTION static + void apply_single( + typename std::enable_if + <( std::is_same< Tag , void >::value && + ! std::is_same< typename DerivedTaskType::result_type , void >::value + ), TaskMember * >::type t ) + { + { + typedef typename DerivedTaskType::functor_type functor_type ; + + DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); + functor_type * const f = static_cast< functor_type * >( self ); + + f->apply( self->m_result ); + + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + } + + //---------------------------------------- + + template< class DerivedTaskType , class Tag > + KOKKOS_FUNCTION static + void apply_team( + typename std::enable_if + <( std::is_same<Tag,void>::value && + std::is_same<typename DerivedTaskType::result_type,void>::value + ), TaskMember * >::type t + , Kokkos::Impl::ThreadsExecTeamMember & member + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + functor_type * const f = + static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); + + f->apply( member ); + + // Synchronize for possible functor destruction and + // completion of team task. + if ( member.team_fan_in() ) { + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + } + + template< class DerivedTaskType , class Tag > + KOKKOS_FUNCTION static + void apply_team( + typename std::enable_if + <( std::is_same<Tag,void>::value && + ! std::is_same<typename DerivedTaskType::result_type,void>::value + ), TaskMember * >::type t + , Kokkos::Impl::ThreadsExecTeamMember & member + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + + DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); + functor_type * const f = static_cast< functor_type * >( self ); + + f->apply( member , self->m_result ); + + // Synchronize for possible functor destruction and + // completion of team task. + if ( member.team_fan_in() ) { + if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { + f->~functor_type(); + } + } + } + + //---------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + static + void assign( TaskMember ** const lhs , TaskMember * const rhs ); +#else + KOKKOS_INLINE_FUNCTION static + void assign( TaskMember ** const lhs , TaskMember * const rhs ) {} +#endif + + TaskMember * get_dependence( int i ) const ; + + KOKKOS_INLINE_FUNCTION + int get_dependence() const { return m_dep_size ; } + + void clear_dependence(); + + void latch_add( const int k ); + + //---------------------------------------- + + typedef FutureValueTypeIsVoidError get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return get_result_type() ; } + + inline static + void construct_result( TaskMember * const ) {} + + KOKKOS_INLINE_FUNCTION + Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } + +}; + +/** \brief A Future< Kokkos::Threads , ResultType > will cast + * from TaskMember< Kokkos::Threads , void , void > + * to TaskMember< Kokkos::Threads , ResultType , void > + * to query the result. + */ +template< class ResultType > +class TaskMember< Kokkos::Threads , ResultType , void > + : public TaskMember< Kokkos::Threads , void , void > +{ +public: + + typedef ResultType result_type ; + + result_type m_result ; + + typedef const result_type & get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return m_result ; } + + inline static + void construct_result( TaskMember * const ptr ) + { + new((void*)(& ptr->m_result)) result_type(); + } + + inline + TaskMember() : TaskMember< Kokkos::Threads , void , void >(), m_result() {} + + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; +}; + +/** \brief Callback functions will cast + * from TaskMember< Kokkos::Threads , void , void > + * to TaskMember< Kokkos::Threads , ResultType , FunctorType > + * to execute work functions. + */ +template< class ResultType , class FunctorType > +class TaskMember< Kokkos::Threads , ResultType , FunctorType > + : public TaskMember< Kokkos::Threads , ResultType , void > + , public FunctorType +{ +public: + typedef ResultType result_type ; + typedef FunctorType functor_type ; + + inline + TaskMember( const functor_type & arg_functor ) + : TaskMember< Kokkos::Threads , ResultType , void >() + , functor_type( arg_functor ) + {} + + inline static + void copy_construct( TaskMember * const ptr + , const functor_type & arg_functor ) + { + typedef TaskMember< Kokkos::Threads , ResultType , void > base_type ; + + new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor ); + + base_type::construct_result( static_cast<base_type*>( ptr ) ); + } + + TaskMember() = delete ; + TaskMember( TaskMember && ) = delete ; + TaskMember( const TaskMember & ) = delete ; + TaskMember & operator = ( TaskMember && ) = delete ; + TaskMember & operator = ( const TaskMember & ) = delete ; +}; + +//---------------------------------------------------------------------------- + +struct ThreadsTaskPolicyQueue { + + enum { NPRIORITY = 3 }; + + typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads > + memory_space ; + + typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Threads, void, void > + task_root_type ; + + memory_space m_space ; + task_root_type * m_team[ NPRIORITY ]; + task_root_type * m_serial[ NPRIORITY ]; + int m_team_size ; ///< Fixed size of a task-team + int m_default_dependence_capacity ; + int volatile m_count_ready ; ///< Ready plus executing tasks + int volatile m_count_alloc ; ///< Total allocated tasks + + // Execute tasks until all non-waiting tasks are complete. + static void driver( Kokkos::Impl::ThreadsExec & exec + , const void * arg ); + + task_root_type * allocate_task + ( const unsigned arg_sizeof_task + , const unsigned arg_dep_capacity + , const unsigned arg_team_shmem + ); + + void deallocate_task( void * , unsigned ); + void schedule_task( task_root_type * const + , const bool initial_spawn = true ); + void reschedule_task( task_root_type * const ); + void add_dependence( task_root_type * const after + , task_root_type * const before ); + + // When a task finishes executing update its dependences + // and either deallocate the task if complete + // or reschedule the task if respawned. + void complete_executed_task( task_root_type * ); + + // Pop a task from a ready queue + static task_root_type * + pop_ready_task( task_root_type * volatile * const queue ); + + ThreadsTaskPolicyQueue() = delete ; + ThreadsTaskPolicyQueue( ThreadsTaskPolicyQueue && ) = delete ; + ThreadsTaskPolicyQueue( const ThreadsTaskPolicyQueue & ) = delete ; + ThreadsTaskPolicyQueue & operator = ( ThreadsTaskPolicyQueue && ) = delete ; + ThreadsTaskPolicyQueue & operator = ( const ThreadsTaskPolicyQueue & ) = delete ; + + ~ThreadsTaskPolicyQueue(); + + ThreadsTaskPolicyQueue + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_team_size + ); + + // Callback to destroy the shared memory tracked queue. + struct Destroy { + ThreadsTaskPolicyQueue * m_policy ; + void destroy_shared_allocation(); + }; +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +void wait( TaskPolicy< Kokkos::Threads > & ); + +template<> +class TaskPolicy< Kokkos::Threads > +{ +public: + + typedef Kokkos::Threads execution_space ; + typedef TaskPolicy execution_policy ; + typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; + +private: + + typedef Impl::TaskMember< Kokkos::Threads , void , void > task_root_type ; + typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads > memory_space ; + + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + Impl::ThreadsTaskPolicyQueue * m_policy ; + + template< class FunctorType > + static inline + const task_root_type * get_task_root( const FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); + } + + template< class FunctorType > + static inline + task_root_type * get_task_root( FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< task_root_type * >( static_cast< task_type * >(f) ); + } + + /** \brief Allocate and construct a task. + * + * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] + */ + template< class DerivedTaskType , class Tag > + task_root_type * + create( const typename DerivedTaskType::functor_type & arg_functor + , const task_root_type::function_single_type arg_apply_single + , const task_root_type::function_team_type arg_apply_team + , const unsigned arg_team_shmem + , const unsigned arg_dependence_capacity + ) + { + task_root_type * const t = + m_policy->allocate_task( sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_team_shmem + ); + if ( t != 0 ) { + + DerivedTaskType * const task = static_cast<DerivedTaskType*>(t); + + DerivedTaskType::copy_construct( task , arg_functor ); + + task->task_root_type::m_verify = & task_root_type::template verify_type< typename DerivedTaskType::value_type > ; + task->task_root_type::m_team = arg_apply_team ; + task->task_root_type::m_serial = arg_apply_single ; + + // Do not proceed until initialization is written to memory + Kokkos::memory_fence(); + } + return t ; + } + +public: + + // Valid team sizes are 1, + // Threads::pool_size(1) == threads per numa, or + // Threads::pool_size(2) == threads per core + + TaskPolicy + ( const unsigned arg_task_max_count + , const unsigned arg_task_max_size + , const unsigned arg_task_default_dependence_capacity = 4 + , const unsigned arg_task_team_size = 0 /* choose default */ + ); + + KOKKOS_FUNCTION TaskPolicy() = default ; + KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int allocated_task_count() const { return m_policy->m_count_alloc ; } + + //---------------------------------------- + // Create serial-thread task + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + TaskPolicy::create< task_type , void > + ( functor + , & task_root_type::template apply_single< task_type , void > + , task_root_type::function_team_type(0) + , 0 + , dependence_capacity + ) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + proc_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) + { return task_create( functor , dependence_capacity ); } + + // Create thread-team task + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + TaskPolicy::create< task_type , void > + ( functor + , task_root_type::function_single_type(0) + , & task_root_type::template apply_team< task_type , void > + , Kokkos::Impl::FunctorTeamShmemSize< FunctorType >:: + value( functor , m_policy->m_team_size ) + , dependence_capacity + ) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + proc_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) + { return task_create_team( functor , dependence_capacity ); } + + template< class A1 , class A2 , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( const Future<A1,A2> & after + , const Future<A3,A4> & before + , typename std::enable_if + < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value + && + std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + m_policy->add_dependence( after.m_task , before.m_task ); +#endif + } + + //---------------------------------------- + + Future< Latch , execution_space > + KOKKOS_INLINE_FUNCTION + create_latch( const int N ) const + { + task_root_type * const task = + m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 ); + task->m_dep_size = N ; // Using m_dep_size for latch counter + task->m_state = TASK_STATE_WAITING ; + return Future< Latch , execution_space >( task ); + } + + //---------------------------------------- + + template< class FunctorType , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( FunctorType * task_functor + , const Future<A3,A4> & before + , typename std::enable_if + < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + m_policy->add_dependence( get_task_root(task_functor) , before.m_task ); +#endif + } + + template< class ValueType > + const Future< ValueType , execution_space > & + spawn( const Future< ValueType , execution_space > & f + , const bool priority = false ) const + { + if ( f.m_task ) { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + f.m_task->m_queue = + ( f.m_task->m_team != 0 + ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) + : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); + m_policy->schedule_task( f.m_task ); +#endif + } + return f ; + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn( FunctorType * task_functor + , const bool priority = false ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type * const t = get_task_root(task_functor); + t->m_queue = + ( t->m_team != 0 ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) + : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); + m_policy->reschedule_task( t ); +#endif + } + + // When a create method fails by returning a null Future + // the task that called the create method may respawn + // with a dependence on memory becoming available. + // This is a race as more than one task may be respawned + // with this need. + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn_needing_memory( FunctorType * task_functor ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type * const t = get_task_root(task_functor); + t->m_queue = + ( t->m_team != 0 ? & ( m_policy->m_team[ 2 ] ) + : & ( m_policy->m_serial[ 2 ] ) ); + m_policy->reschedule_task( t ); +#endif + } + + //---------------------------------------- + // Functions for an executing task functor to query dependences, + // set new dependences, and respawn itself. + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< void , execution_space > + get_dependence( const FunctorType * task_functor , int i ) const + { + return Future<void,execution_space>( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->get_dependence(i) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + int get_dependence( const FunctorType * task_functor ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return get_task_root(task_functor)->get_dependence(); } +#else + { return 0 ; } +#endif + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void clear_dependence( FunctorType * task_functor ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { get_task_root(task_functor)->clear_dependence(); } +#else + {} +#endif + + //---------------------------------------- + + static member_type & member_single(); + + friend void wait( TaskPolicy< Kokkos::Threads > & ); +}; + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_THREADS_TASKPOLICY_HPP */ + + diff --git a/lib/kokkos/core/src/impl/CMakeLists.txt b/lib/kokkos/core/src/impl/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c543194de3993015f6940506c0ff51da157f2084 --- /dev/null +++ b/lib/kokkos/core/src/impl/CMakeLists.txt @@ -0,0 +1,18 @@ + +SET(HEADERS "") +SET(SOURCES "") + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB SOURCES *.cpp) + +TRIBITS_ADD_LIBRARY( + kokkoscore_impl + NOINSTALLHEADERS ${HEADERS} + SOURCES ${SOURCES} + DEPLIBS + ) + +SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + +INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/) + diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96b37043455e51d726e1d22e4f3e450986acae01 --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp @@ -0,0 +1,346 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ; + +void SharedAllocationRecord< void , void >::tracking_claim_and_disable() +{ + // A host thread claim and disable tracking flag + + while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) ); +} + +void SharedAllocationRecord< void , void >::tracking_release_and_enable() +{ + // The host thread that claimed and disabled the tracking flag + // now release and enable tracking. + + if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){ + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" ); + } +} + +//---------------------------------------------------------------------------- + +bool +SharedAllocationRecord< void , void >:: +is_sane( SharedAllocationRecord< void , void > * arg_record ) +{ + constexpr static SharedAllocationRecord * zero = 0 ; + + SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ; + + bool ok = root != 0 && root->use_count() == 0 ; + + if ( ok ) { + SharedAllocationRecord * root_next = 0 ; + + // Lock the list: + while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero ); + + for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) { + const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next ); + const bool ok_root = ok_non_null && rec->m_root == root ; + const bool ok_prev_next = ok_non_null && ( rec->m_prev != root ? rec->m_prev->m_next == rec : root_next == rec ); + const bool ok_next_prev = ok_non_null && rec->m_next->m_prev == rec ; + const bool ok_count = ok_non_null && 0 <= rec->use_count() ; + + ok = ok_root && ok_prev_next && ok_next_prev && ok_count ; + +if ( ! ok ) { + //Formatting dependent on sizeof(uintptr_t) + const char * format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n"; + } + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n"; + } + + fprintf(stderr + , format_string + , reinterpret_cast< uintptr_t >( rec ) + , rec->use_count() + , reinterpret_cast< uintptr_t >( rec->m_root ) + , reinterpret_cast< uintptr_t >( rec->m_next ) + , reinterpret_cast< uintptr_t >( rec->m_prev ) + , reinterpret_cast< uintptr_t >( rec->m_next != NULL ? rec->m_next->m_prev : NULL ) + , reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next ) + ); +} + + } + + if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) { + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking"); + } + } + + return ok ; +} + +SharedAllocationRecord<void,void> * +SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr ) +{ + constexpr static SharedAllocationRecord * zero = 0 ; + + SharedAllocationRecord * root_next = 0 ; + + // Lock the list: + while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero ); + + // Iterate searching for the record with this data pointer + + SharedAllocationRecord * r = root_next ; + + while ( ( r != arg_root ) && ( r->data() != arg_data_ptr ) ) { r = r->m_next ; } + + if ( r == arg_root ) { r = 0 ; } + + if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) { + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking"); + } + + return r ; +} + + +/**\brief Construct and insert into 'arg_root' tracking set. + * use_count is zero. + */ +SharedAllocationRecord< void , void >:: +SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root + , SharedAllocationHeader * arg_alloc_ptr + , size_t arg_alloc_size + , SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + : m_alloc_ptr( arg_alloc_ptr ) + , m_alloc_size( arg_alloc_size ) + , m_dealloc( arg_dealloc ) + , m_root( arg_root ) + , m_prev( 0 ) + , m_next( 0 ) + , m_count( 0 ) +{ + constexpr static SharedAllocationRecord * zero = 0 ; + + if ( 0 != arg_alloc_ptr ) { + + // Insert into the root double-linked list for tracking + // + // before: arg_root->m_next == next ; next->m_prev == arg_root + // after: arg_root->m_next == this ; this->m_prev == arg_root ; + // this->m_next == next ; next->m_prev == this + + m_prev = m_root ; + + // Read root->m_next and lock by setting to zero + while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero ); + + m_next->m_prev = this ; + + // memory fence before completing insertion into linked list + Kokkos::memory_fence(); + + if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) { + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking"); + } + } + else { + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation"); + } +} + +void +SharedAllocationRecord< void , void >:: +increment( SharedAllocationRecord< void , void > * arg_record ) +{ + const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 ); + + if ( old_count < 0 ) { // Error + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment"); + } +} + +SharedAllocationRecord< void , void > * +SharedAllocationRecord< void , void >:: +decrement( SharedAllocationRecord< void , void > * arg_record ) +{ + constexpr static SharedAllocationRecord * zero = 0 ; + + const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 ); + +#if 0 + if ( old_count <= 1 ) { + fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count ); + fflush(stderr); + } +#endif + + + if ( old_count == 1 ) { + + // before: arg_record->m_prev->m_next == arg_record && + // arg_record->m_next->m_prev == arg_record + // + // after: arg_record->m_prev->m_next == arg_record->m_next && + // arg_record->m_next->m_prev == arg_record->m_prev + + SharedAllocationRecord * root_next = 0 ; + + // Lock the list: + while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero ); + + arg_record->m_next->m_prev = arg_record->m_prev ; + + if ( root_next != arg_record ) { + arg_record->m_prev->m_next = arg_record->m_next ; + } + else { + // before: arg_record->m_root == arg_record->m_prev + // after: arg_record->m_root == arg_record->m_next + root_next = arg_record->m_next ; + } + + // Unlock the list: + if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) { + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking"); + } + + arg_record->m_next = 0 ; + arg_record->m_prev = 0 ; + + function_type d = arg_record->m_dealloc ; + (*d)( arg_record ); + arg_record = 0 ; + } + else if ( old_count < 1 ) { // Error + fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count ); + fflush(stderr); + Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count"); + } + + return arg_record ; +} + +void +SharedAllocationRecord< void , void >:: +print_host_accessible_records( std::ostream & s + , const char * const space_name + , const SharedAllocationRecord * const root + , const bool detail ) +{ + const SharedAllocationRecord< void , void > * r = root ; + + char buffer[256] ; + + if ( detail ) { + do { + //Formatting dependent on sizeof(uintptr_t) + const char * format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; + } + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; + } + + snprintf( buffer , 256 + , format_string + , space_name + , reinterpret_cast<uintptr_t>( r ) + , reinterpret_cast<uintptr_t>( r->m_prev ) + , reinterpret_cast<uintptr_t>( r->m_next ) + , reinterpret_cast<uintptr_t>( r->m_alloc_ptr ) + , r->m_alloc_size + , r->use_count() + , reinterpret_cast<uintptr_t>( r->m_dealloc ) + , r->m_alloc_ptr->m_label + ); + std::cout << buffer ; + r = r->m_next ; + } while ( r != root ); + } + else { + do { + if ( r->m_alloc_ptr ) { + //Formatting dependent on sizeof(uintptr_t) + const char * format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "%s [ 0x%.12lx + %ld ] %s\n"; + } + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "%s [ 0x%.12llx + %ld ] %s\n"; + } + + snprintf( buffer , 256 + , format_string + , space_name + , reinterpret_cast< uintptr_t >( r->data() ) + , r->size() + , r->m_alloc_ptr->m_label + ); + } + else { + snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name ); + } + std::cout << buffer ; + r = r->m_next ; + } while ( r != root ); + } +} + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + + diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1498eafb008ffa5d26a84094df9ba3f48126551e --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp @@ -0,0 +1,400 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SHARED_ALLOC_HPP_ +#define KOKKOS_SHARED_ALLOC_HPP_ + +#include <stdint.h> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class MemorySpace = void , class DestroyFunctor = void > +class SharedAllocationRecord ; + +class SharedAllocationHeader { +private: + + typedef SharedAllocationRecord<void,void> Record ; + + static constexpr unsigned maximum_label_length = ( 1u << 7 /* 128 */ ) - sizeof(Record*); + + template< class , class > friend class SharedAllocationRecord ; + + Record * m_record ; + char m_label[ maximum_label_length ]; + +public: + + /* Given user memory get pointer to the header */ + KOKKOS_INLINE_FUNCTION static + const SharedAllocationHeader * get_header( void * alloc_ptr ) + { return reinterpret_cast<SharedAllocationHeader*>( reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader) ); } +}; + +template<> +class SharedAllocationRecord< void , void > { +protected: + + static_assert( sizeof(SharedAllocationHeader) == ( 1u << 7 /* 128 */ ) , "sizeof(SharedAllocationHeader) != 128" ); + + template< class , class > friend class SharedAllocationRecord ; + + typedef void (* function_type )( SharedAllocationRecord<void,void> * ); + + static int s_tracking_enabled ; + + SharedAllocationHeader * const m_alloc_ptr ; + size_t const m_alloc_size ; + function_type const m_dealloc ; + SharedAllocationRecord * const m_root ; + SharedAllocationRecord * m_prev ; + SharedAllocationRecord * m_next ; + int m_count ; + + SharedAllocationRecord( SharedAllocationRecord && ) = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( SharedAllocationRecord && ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + /**\brief Construct and insert into 'arg_root' tracking set. + * use_count is zero. + */ + SharedAllocationRecord( SharedAllocationRecord * arg_root + , SharedAllocationHeader * arg_alloc_ptr + , size_t arg_alloc_size + , function_type arg_dealloc + ); + +public: + + static int tracking_enabled() { return s_tracking_enabled ; } + + /**\brief A host process thread claims and disables the + * shared allocation tracking flag. + */ + static void tracking_claim_and_disable(); + + /**\brief A host process thread releases and enables the + * shared allocation tracking flag. + */ + static void tracking_release_and_enable(); + + ~SharedAllocationRecord() = default ; + + SharedAllocationRecord() + : m_alloc_ptr( 0 ) + , m_alloc_size( 0 ) + , m_dealloc( 0 ) + , m_root( this ) + , m_prev( this ) + , m_next( this ) + , m_count( 0 ) + {} + + static constexpr unsigned maximum_label_length = SharedAllocationHeader::maximum_label_length ; + + KOKKOS_INLINE_FUNCTION + const SharedAllocationHeader * head() const { return m_alloc_ptr ; } + + /* User's memory begins at the end of the header */ + KOKKOS_INLINE_FUNCTION + void * data() const { return reinterpret_cast<void*>( m_alloc_ptr + 1 ); } + + /* User's memory begins at the end of the header */ + size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader) ; } + + /* Cannot be 'constexpr' because 'm_count' is volatile */ + int use_count() const { return *static_cast<const volatile int *>(&m_count); } + + /* Increment use count */ + static void increment( SharedAllocationRecord * ); + + /* Decrement use count. If 1->0 then remove from the tracking list and invoke m_dealloc */ + static SharedAllocationRecord * decrement( SharedAllocationRecord * ); + + /* Given a root record and data pointer find the record */ + static SharedAllocationRecord * find( SharedAllocationRecord * const , void * const ); + + /* Sanity check for the whole set of records to which the input record belongs. + * Locks the set's insert/erase operations until the sanity check is complete. + */ + static bool is_sane( SharedAllocationRecord * ); + + /* Print host-accessible records */ + static void print_host_accessible_records( std::ostream & + , const char * const space_name + , const SharedAllocationRecord * const root + , const bool detail ); +}; + +namespace { + +/* Taking the address of this function so make sure it is unique */ +template < class MemorySpace , class DestroyFunctor > +void deallocate( SharedAllocationRecord<void,void> * record_ptr ) +{ + typedef SharedAllocationRecord< MemorySpace , void > base_type ; + typedef SharedAllocationRecord< MemorySpace , DestroyFunctor > this_type ; + + this_type * const ptr = static_cast< this_type * >( + static_cast< base_type * >( record_ptr ) ); + + ptr->m_destroy.destroy_shared_allocation(); + + delete ptr ; +} + +} + +/* + * Memory space specialization of SharedAllocationRecord< Space , void > requires : + * + * SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void , void > + * { + * // delete allocated user memory via static_cast to this type. + * static void deallocate( const SharedAllocationRecord<void,void> * ); + * Space m_space ; + * } + */ +template< class MemorySpace , class DestroyFunctor > +class SharedAllocationRecord : public SharedAllocationRecord< MemorySpace , void > +{ +private: + + SharedAllocationRecord( const MemorySpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc + ) + /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */ + : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > ) + , m_destroy() + {} + + SharedAllocationRecord() = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + +public: + + DestroyFunctor m_destroy ; + + // Allocate with a zero use count. Incrementing the use count from zero to one + // inserts the record into the tracking list. Decrementing the count from one to zero + // removes from the trakcing list and deallocates. + KOKKOS_INLINE_FUNCTION static + SharedAllocationRecord * allocate( const MemorySpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc + ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return new SharedAllocationRecord( arg_space , arg_label , arg_alloc ); +#else + return (SharedAllocationRecord *) 0 ; +#endif + } +}; + +union SharedAllocationTracker { +private: + + typedef SharedAllocationRecord<void,void> Record ; + + enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul }; + + // The allocation record resides in Host memory space + uintptr_t m_record_bits ; + Record * m_record ; + +public: + + // Use macros instead of inline functions to reduce + // pressure on compiler optimization by reducing + // number of symbols and inline functons. + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \ + Record::tracking_enabled() + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \ + if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record ); + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \ + if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record ); + +#else + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0 + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */ + +#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */ + +#endif + + /** \brief Assign a specialized record */ + inline + void assign_allocated_record_to_uninitialized( Record * arg_record ) + { + if ( arg_record ) { + Record::increment( m_record = arg_record ); + } + else { + m_record_bits = DO_NOT_DEREF_FLAG ; + } + } + + template< class MemorySpace > + constexpr + SharedAllocationRecord< MemorySpace , void > & + get_record() const + { return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); } + + template< class MemorySpace > + std::string get_label() const + { + return ( m_record_bits & DO_NOT_DEREF_FLAG ) + ? std::string() + : static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label() + ; + } + + KOKKOS_INLINE_FUNCTION + int use_count() const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + Record * const tmp = reinterpret_cast<Record*>( m_record_bits & ~DO_NOT_DEREF_FLAG ); + return ( tmp ? tmp->use_count() : 0 ); +#else + return 0 ; +#endif + } + + KOKKOS_FORCEINLINE_FUNCTION + ~SharedAllocationTracker() + { KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT } + + KOKKOS_FORCEINLINE_FUNCTION + constexpr SharedAllocationTracker() + : m_record_bits( DO_NOT_DEREF_FLAG ) {} + + // Move: + + KOKKOS_FORCEINLINE_FUNCTION + SharedAllocationTracker( SharedAllocationTracker && rhs ) + : m_record_bits( rhs.m_record_bits ) + { rhs.m_record_bits = DO_NOT_DEREF_FLAG ; } + + KOKKOS_FORCEINLINE_FUNCTION + SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs ) + { + // If this is tracking then must decrement + KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT + // Move and reset RHS to default constructed value. + m_record_bits = rhs.m_record_bits ; + rhs.m_record_bits = DO_NOT_DEREF_FLAG ; + return *this ; + } + + // Copy: + + KOKKOS_FORCEINLINE_FUNCTION + SharedAllocationTracker( const SharedAllocationTracker & rhs ) + : m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED + ? rhs.m_record_bits + : rhs.m_record_bits | DO_NOT_DEREF_FLAG ) + { + KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT + } + + /** \brief Copy construction may disable tracking. */ + KOKKOS_FORCEINLINE_FUNCTION + SharedAllocationTracker( const SharedAllocationTracker & rhs + , const bool enable_tracking ) + : m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED + && enable_tracking + ? rhs.m_record_bits + : rhs.m_record_bits | DO_NOT_DEREF_FLAG ) + { KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT } + + KOKKOS_FORCEINLINE_FUNCTION + SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs ) + { + // If this is tracking then must decrement + KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT + m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED + ? rhs.m_record_bits + : rhs.m_record_bits | DO_NOT_DEREF_FLAG ; + KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT + return *this ; + } + + /** \brief Copy assignment may disable tracking */ + KOKKOS_FORCEINLINE_FUNCTION + void assign( const SharedAllocationTracker & rhs + , const bool enable_tracking ) + { + KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT + m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED + && enable_tracking + ? rhs.m_record_bits + : rhs.m_record_bits | DO_NOT_DEREF_FLAG ; + KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT + } + +#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED +#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT +#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT + +}; + + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +#endif diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp new file mode 100644 index 0000000000000000000000000000000000000000..17d28ace4dae471accfa91ab52629aee357850e9 --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp @@ -0,0 +1,606 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP +#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP + +#include <Kokkos_Array.hpp> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class DataType , class ArrayLayout , class V , size_t N , class P > +struct ViewDataAnalysis< DataType , ArrayLayout , Kokkos::Array<V,N,P> > +{ +private: + + typedef ViewArrayAnalysis<DataType> array_analysis ; + + static_assert( std::is_same<P,void>::value , "" ); + static_assert( std::is_same<typename array_analysis::non_const_value_type , Kokkos::Array<V,N,P> >::value , "" ); + static_assert( std::is_scalar<V>::value , "View of Array type must be of a scalar type" ); + +public: + + typedef Kokkos::Array<> specialize ; + + typedef typename array_analysis::dimension dimension ; + +private: + + enum { is_const = std::is_same< typename array_analysis::value_type + , typename array_analysis::const_value_type + >::value }; + + typedef typename dimension::template append<N>::type array_scalar_dimension ; + + typedef typename std::conditional< is_const , const V , V >::type scalar_type ; + typedef V non_const_scalar_type ; + typedef const V const_scalar_type ; + +public: + + typedef typename array_analysis::value_type value_type ; + typedef typename array_analysis::const_value_type const_value_type ; + typedef typename array_analysis::non_const_value_type non_const_value_type ; + + typedef typename ViewDataType< value_type , dimension >::type type ; + typedef typename ViewDataType< const_value_type , dimension >::type const_type ; + typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ; + + typedef typename ViewDataType< scalar_type , array_scalar_dimension >::type scalar_array_type ; + typedef typename ViewDataType< const_scalar_type , array_scalar_dimension >::type const_scalar_array_type ; + typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_scalar_array_type ; +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief View mapping for non-specialized data type and standard layout */ +template< class Traits > +class ViewMapping< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , Kokkos::Array<> >::value && + ( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value ) + )>::type > +{ +private: + + template< class , class ... > friend class ViewMapping ; + template< class , class ... > friend class Kokkos::Experimental::View ; + + typedef ViewOffset< typename Traits::dimension + , typename Traits::array_layout + , void + > offset_type ; + + typedef typename Traits::value_type::pointer handle_type ; + + handle_type m_handle ; + offset_type m_offset ; + size_t m_stride ; + + typedef typename Traits::value_type::value_type scalar_type ; + + typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ; + typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ; + + enum { is_contiguous_reference = + ( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) }; + + enum { Array_N = Traits::value_type::size() }; + enum { Array_S = is_contiguous_reference ? Array_N : 1 }; + + KOKKOS_INLINE_FUNCTION + ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) + : m_handle( arg_handle ) + , m_offset( arg_offset ) + , m_stride( is_contiguous_reference ? 0 : arg_offset.span() ) + {} + +public: + + //---------------------------------------- + // Domain dimensions + + enum { Rank = Traits::dimension::rank }; + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const + { return m_offset.m_dim.extent(r); } + + KOKKOS_INLINE_FUNCTION constexpr + typename Traits::array_layout layout() const + { return m_offset.layout(); } + + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } + + // Is a regular layout with uniform striding for each index. + using is_regular = typename offset_type::is_regular ; + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } + + //---------------------------------------- + // Range span + + /** \brief Span of the mapped range */ + KOKKOS_INLINE_FUNCTION constexpr size_t span() const + { return m_offset.span() * Array_N ; } + + /** \brief Is the mapped range span contiguous */ + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const + { return m_offset.span_is_contiguous(); } + + typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ; + + typedef handle_type pointer_type ; + + /** \brief If data references are lvalue_reference than can query pointer to memory */ + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const + { return m_handle ; } + + //---------------------------------------- + // The View class performs all rank and bounds checking before + // calling these element reference methods. + + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type + reference( const I0 & i0 ) const + { return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 ) const + { return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 , typename I3 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const + { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); } + + //---------------------------------------- + +private: + + enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; + enum { MemorySpanSize = sizeof(scalar_type) }; + +public: + + /** \brief Span, in bytes, of the referenced memory */ + KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const + { + return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION ~ViewMapping() {} + KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {} + KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; } + + KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; } + + //---------------------------------------- + + template< class ... Args > + KOKKOS_INLINE_FUNCTION + ViewMapping( pointer_type ptr , Args ... args ) + : m_handle( ptr ) + , m_offset( std::integral_constant< unsigned , 0 >() , args... ) + , m_stride( m_offset.span() ) + {} + + //---------------------------------------- + + template< class ... P > + SharedAllocationRecord<> * + allocate_shared( ViewCtorProp< P... > const & arg_prop + , typename Traits::array_layout const & arg_layout + ) + { + typedef ViewCtorProp< P... > alloc_prop ; + + typedef typename alloc_prop::execution_space execution_space ; + typedef typename Traits::memory_space memory_space ; + typedef ViewValueFunctor< execution_space , scalar_type > functor_type ; + typedef SharedAllocationRecord< memory_space , functor_type > record_type ; + + // Query the mapping for byte-size of allocation. + typedef std::integral_constant< unsigned , + alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ; + + m_offset = offset_type( padding(), arg_layout ); + + const size_t alloc_size = + ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + + // Allocate memory from the memory space and create tracking record. + record_type * const record = + record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value + , ((ViewCtorProp<void,std::string> const &) arg_prop ).value + , alloc_size ); + + if ( alloc_size ) { + m_handle = + handle_type( reinterpret_cast< pointer_type >( record->data() ) ); + + if ( alloc_prop::initialize ) { + // The functor constructs and destroys + record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value + , (pointer_type) m_handle + , m_offset.span() * Array_N + ); + + record->m_destroy.construct_shared_allocation(); + } + } + + return record ; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/** \brief Assign compatible default mappings */ + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value + && + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + )>::type > +{ +public: + + enum { is_assignable = true }; + + typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + KOKKOS_INLINE_FUNCTION + static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) + { + static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value + , "View assignment must have same value type or const = non-const" ); + + static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value + , "View assignment must have compatible dimensions" ); + + static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value || + ( DstTraits::dimension::rank == 0 ) || + ( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 ) + , "View assignment must have compatible layout or have rank <= 1" ); + + typedef typename DstType::offset_type dst_offset_type ; + + dst.m_offset = dst_offset_type( src.m_offset ); + dst.m_handle = src.m_handle ; + dst.m_stride = src.m_stride ; + } +}; + +/** \brief Assign Array to non-Array */ + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + )>::type > +{ +public: + + // Can only convert to View::array_type + + enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value }; + + typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + KOKKOS_INLINE_FUNCTION + static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) + { + static_assert( is_assignable , "Can only convert to array_type" ); + + typedef typename DstType::offset_type dst_offset_type ; + + // Array dimension becomes the last dimension. + // Arguments beyond the destination rank are ignored. + if ( src.span_is_contiguous() ) { // not padded + dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() , + typename DstTraits::array_layout + ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() ) + , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() ) + , ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() ) + , ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() ) + , ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() ) + , ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() ) + , ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() ) + , ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() ) + ) ); + } + else { // is padded + typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ; + + dst.m_offset = dst_offset_type( padded() , + typename DstTraits::array_layout + ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() ) + , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() ) + , ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() ) + , ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() ) + , ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() ) + , ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() ) + , ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() ) + , ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() ) + ) ); + } + + dst.m_handle = src.m_handle ; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< class SrcTraits , class ... Args > +struct ViewMapping + < typename std::enable_if<( + std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + )>::type + , SrcTraits + , Args ... > +{ +private: + + static_assert( SrcTraits::rank == sizeof...(Args) , "" ); + + enum : bool + { R0 = is_integral_extent<0,Args...>::value + , R1 = is_integral_extent<1,Args...>::value + , R2 = is_integral_extent<2,Args...>::value + , R3 = is_integral_extent<3,Args...>::value + , R4 = is_integral_extent<4,Args...>::value + , R5 = is_integral_extent<5,Args...>::value + , R6 = is_integral_extent<6,Args...>::value + , R7 = is_integral_extent<7,Args...>::value + }; + + enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Whether right-most rank is a range. + enum { R0_rev = 0 == SrcTraits::rank ? false : ( + 1 == SrcTraits::rank ? R0 : ( + 2 == SrcTraits::rank ? R1 : ( + 3 == SrcTraits::rank ? R2 : ( + 4 == SrcTraits::rank ? R3 : ( + 5 == SrcTraits::rank ? R4 : ( + 6 == SrcTraits::rank ? R5 : ( + 7 == SrcTraits::rank ? R6 : R7 ))))))) }; + + // Subview's layout + typedef typename std::conditional< + ( /* Same array layout IF */ + ( rank == 0 ) /* output rank zero */ + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) + ), typename SrcTraits::array_layout , Kokkos::LayoutStride + >::type array_layout ; + + typedef typename SrcTraits::value_type value_type ; + + typedef typename std::conditional< rank == 0 , value_type , + typename std::conditional< rank == 1 , value_type * , + typename std::conditional< rank == 2 , value_type ** , + typename std::conditional< rank == 3 , value_type *** , + typename std::conditional< rank == 4 , value_type **** , + typename std::conditional< rank == 5 , value_type ***** , + typename std::conditional< rank == 6 , value_type ****** , + typename std::conditional< rank == 7 , value_type ******* , + value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type + data_type ; + +public: + + typedef Kokkos::Experimental::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > traits_type ; + + typedef Kokkos::Experimental::View + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > type ; + + KOKKOS_INLINE_FUNCTION + static void assign( ViewMapping< traits_type , void > & dst + , ViewMapping< SrcTraits , void > const & src + , Args ... args ) + { + typedef ViewMapping< traits_type , void > DstType ; + + typedef typename DstType::offset_type dst_offset_type ; + typedef typename DstType::handle_type dst_handle_type ; + + const SubviewExtents< SrcTraits::rank , rank > + extents( src.m_offset.m_dim , args... ); + + dst.m_offset = dst_offset_type( src.m_offset , extents ); + dst.m_handle = dst_handle_type( src.m_handle + + src.m_offset( extents.domain_offset(0) + , extents.domain_offset(1) + , extents.domain_offset(2) + , extents.domain_offset(3) + , extents.domain_offset(4) + , extents.domain_offset(5) + , extents.domain_offset(6) + , extents.domain_offset(7) + ) ); + } +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */ + diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6525fed0a5ceb5995db3517b84fec6f7985e6d54 --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp @@ -0,0 +1,252 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP +#define KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/* For backward compatibility */ + +struct ViewAllocateWithoutInitializing { + + const std::string label ; + + ViewAllocateWithoutInitializing() : label() {} + + explicit + ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {} + + explicit + ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {} +}; + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct WithoutInitializing_t {}; +struct AllowPadding_t {}; +struct NullSpace_t {}; + +//---------------------------------------------------------------------------- +/**\brief Whether a type can be used for a view label */ + +template < typename > +struct is_view_label : public std::false_type {}; + +template<> +struct is_view_label< std::string > : public std::true_type {}; + +template< unsigned N > +struct is_view_label< char[N] > : public std::true_type {}; + +template< unsigned N > +struct is_view_label< const char[N] > : public std::true_type {}; + +//---------------------------------------------------------------------------- + +template< typename ... P > +struct ViewCtorProp ; + +/* std::integral_constant<unsigned,I> are dummy arguments + * that avoid duplicate base class errors + */ +template< unsigned I > +struct ViewCtorProp< void , std::integral_constant<unsigned,I> > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + template< typename P > + ViewCtorProp( const P & ) {} +}; + +/* Property flags have constexpr value */ +template< typename P > +struct ViewCtorProp + < typename std::enable_if< + std::is_same< P , AllowPadding_t >::value || + std::is_same< P , WithoutInitializing_t >::value + >::type + , P + > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + typedef P type ; + + ViewCtorProp( const type & ) {} + + static constexpr type value = type(); +}; + +/* Map input label type to std::string */ +template< typename Label > +struct ViewCtorProp + < typename std::enable_if< is_view_label< Label >::value >::type + , Label + > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + typedef std::string type ; + + ViewCtorProp( const type & arg ) : value( arg ) {} + ViewCtorProp( type && arg ) : value( arg ) {} + + type value ; +}; + +template< typename Space > +struct ViewCtorProp + < typename std::enable_if< + Kokkos::Impl::is_memory_space<Space>::value || + Kokkos::Impl::is_execution_space<Space>::value + >::type + , Space + > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + typedef Space type ; + + ViewCtorProp( const type & arg ) : value( arg ) {} + + type value ; +}; + + +template< typename T > +struct ViewCtorProp < void , T * > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + typedef T * type ; + + KOKKOS_INLINE_FUNCTION + ViewCtorProp( const type arg ) : value( arg ) {} + + type value ; +}; + + +template< typename ... P > +struct ViewCtorProp : public ViewCtorProp< void , P > ... +{ +private: + + typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_memory_space , P ... > + var_memory_space ; + + typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_execution_space , P ... > + var_execution_space ; + + struct VOIDDUMMY{}; + + typedef Kokkos::Impl::has_condition< VOIDDUMMY , std::is_pointer , P ... > + var_pointer ; + +public: + + /* Flags for the common properties */ + enum { has_memory_space = var_memory_space::value }; + enum { has_execution_space = var_execution_space::value }; + enum { has_pointer = var_pointer::value }; + enum { has_label = Kokkos::Impl::has_type< std::string , P... >::value }; + enum { allow_padding = Kokkos::Impl::has_type< AllowPadding_t , P... >::value }; + enum { initialize = ! Kokkos::Impl::has_type< WithoutInitializing_t , P ... >::value }; + + typedef typename var_memory_space::type memory_space ; + typedef typename var_execution_space::type execution_space ; + typedef typename var_pointer::type pointer_type ; + + /* Copy from a matching argument list. + * Requires std::is_same< P , ViewCtorProp< void , Args >::value ... + */ + template< typename ... Args > + inline + ViewCtorProp( Args const & ... args ) + : ViewCtorProp< void , P >( args ) ... + {} + + template< typename ... Args > + KOKKOS_INLINE_FUNCTION + ViewCtorProp( pointer_type arg0 , Args const & ... args ) + : ViewCtorProp< void , pointer_type >( arg0 ) + , ViewCtorProp< void , typename ViewCtorProp< void , Args >::type >( args ) ... + {} + + /* Copy from a matching property subset */ + template< typename ... Args > + ViewCtorProp( ViewCtorProp< Args ... > const & arg ) + : ViewCtorProp< void , Args >( ((ViewCtorProp<void,Args> const &) arg ) ) ... + {} +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif + diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ed56536cd91b52f3d0beddc8095eba9a4bb593c9 --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp @@ -0,0 +1,2932 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP +#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP + +#include <type_traits> +#include <initializer_list> + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Pair.hpp> +#include <Kokkos_Layout.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/KokkosExp_ViewCtor.hpp> +#include <impl/Kokkos_Atomic_View.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< unsigned I , size_t ... Args > +struct variadic_size_t + { enum { value = ~size_t(0) }; }; + +template< size_t Val , size_t ... Args > +struct variadic_size_t< 0 , Val , Args ... > + { enum { value = Val }; }; + +template< unsigned I , size_t Val , size_t ... Args > +struct variadic_size_t< I , Val , Args ... > + { enum { value = variadic_size_t< I - 1 , Args ... >::value }; }; + +template< size_t ... Args > +struct rank_dynamic ; + +template<> +struct rank_dynamic<> { enum { value = 0 }; }; + +template< size_t Val , size_t ... Args > +struct rank_dynamic< Val , Args... > +{ + enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value }; +}; + +#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \ + template< size_t V , unsigned > struct ViewDimension ## R \ + { \ + enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \ + enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \ + KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \ + ViewDimension ## R () = default ; \ + ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ + ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ + }; \ + template< unsigned RD > struct ViewDimension ## R < 0 , RD > \ + { \ + enum { ArgN ## R = 0 }; \ + typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \ + ViewDimension ## R () = default ; \ + ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ + ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ + KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \ + }; + +KOKKOS_IMPL_VIEW_DIMENSION( 0 ) +KOKKOS_IMPL_VIEW_DIMENSION( 1 ) +KOKKOS_IMPL_VIEW_DIMENSION( 2 ) +KOKKOS_IMPL_VIEW_DIMENSION( 3 ) +KOKKOS_IMPL_VIEW_DIMENSION( 4 ) +KOKKOS_IMPL_VIEW_DIMENSION( 5 ) +KOKKOS_IMPL_VIEW_DIMENSION( 6 ) +KOKKOS_IMPL_VIEW_DIMENSION( 7 ) + +#undef KOKKOS_IMPL_VIEW_DIMENSION + +template< size_t ... Vals > +struct ViewDimension + : public ViewDimension0< variadic_size_t<0,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension1< variadic_size_t<1,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension2< variadic_size_t<2,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension3< variadic_size_t<3,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension4< variadic_size_t<4,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension5< variadic_size_t<5,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension6< variadic_size_t<6,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension7< variadic_size_t<7,Vals...>::value + , rank_dynamic< Vals... >::value > +{ + typedef ViewDimension0< variadic_size_t<0,Vals...>::value + , rank_dynamic< Vals... >::value > D0 ; + typedef ViewDimension1< variadic_size_t<1,Vals...>::value + , rank_dynamic< Vals... >::value > D1 ; + typedef ViewDimension2< variadic_size_t<2,Vals...>::value + , rank_dynamic< Vals... >::value > D2 ; + typedef ViewDimension3< variadic_size_t<3,Vals...>::value + , rank_dynamic< Vals... >::value > D3 ; + typedef ViewDimension4< variadic_size_t<4,Vals...>::value + , rank_dynamic< Vals... >::value > D4 ; + typedef ViewDimension5< variadic_size_t<5,Vals...>::value + , rank_dynamic< Vals... >::value > D5 ; + typedef ViewDimension6< variadic_size_t<6,Vals...>::value + , rank_dynamic< Vals... >::value > D6 ; + typedef ViewDimension7< variadic_size_t<7,Vals...>::value + , rank_dynamic< Vals... >::value > D7 ; + + using D0::ArgN0 ; + using D1::ArgN1 ; + using D2::ArgN2 ; + using D3::ArgN3 ; + using D4::ArgN4 ; + using D5::ArgN5 ; + using D6::ArgN6 ; + using D7::ArgN7 ; + + using D0::N0 ; + using D1::N1 ; + using D2::N2 ; + using D3::N3 ; + using D4::N4 ; + using D5::N5 ; + using D6::N6 ; + using D7::N7 ; + + enum { rank = sizeof...(Vals) }; + enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value }; + + ViewDimension() = default ; + ViewDimension( const ViewDimension & ) = default ; + ViewDimension & operator = ( const ViewDimension & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr + ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 ) + : D0( n0 ) + , D1( n1 ) + , D2( n2 ) + , D3( n3 ) + , D4( n4 ) + , D5( n5 ) + , D6( n6 ) + , D7( n7 ) + {} + + KOKKOS_INLINE_FUNCTION + constexpr size_t extent( const unsigned r ) const + { + return r == 0 ? N0 : ( + r == 1 ? N1 : ( + r == 2 ? N2 : ( + r == 3 ? N3 : ( + r == 4 ? N4 : ( + r == 5 ? N5 : ( + r == 6 ? N6 : ( + r == 7 ? N7 : 0 ))))))); + } + + template< size_t N > + struct prepend { typedef ViewDimension< N , Vals... > type ; }; + + template< size_t N > + struct append { typedef ViewDimension< Vals... , N > type ; }; +}; + +template< class A , class B > +struct ViewDimensionJoin ; + +template< size_t ... A , size_t ... B > +struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > { + typedef ViewDimension< A... , B... > type ; +}; + +//---------------------------------------------------------------------------- + +template< class DstDim , class SrcDim > +struct ViewDimensionAssignable ; + +template< size_t ... DstArgs , size_t ... SrcArgs > +struct ViewDimensionAssignable< ViewDimension< DstArgs ... > + , ViewDimension< SrcArgs ... > > +{ + typedef ViewDimension< DstArgs... > dst ; + typedef ViewDimension< SrcArgs... > src ; + + enum { value = + unsigned(dst::rank) == unsigned(src::rank) && ( + //Compile time check that potential static dimensions match + ( ( 1 > dst::rank_dynamic && 1 > src::rank_dynamic ) ? (size_t(dst::ArgN0) == size_t(src::ArgN0)) : true ) && + ( ( 2 > dst::rank_dynamic && 2 > src::rank_dynamic ) ? (size_t(dst::ArgN1) == size_t(src::ArgN1)) : true ) && + ( ( 3 > dst::rank_dynamic && 3 > src::rank_dynamic ) ? (size_t(dst::ArgN2) == size_t(src::ArgN2)) : true ) && + ( ( 4 > dst::rank_dynamic && 4 > src::rank_dynamic ) ? (size_t(dst::ArgN3) == size_t(src::ArgN3)) : true ) && + ( ( 5 > dst::rank_dynamic && 5 > src::rank_dynamic ) ? (size_t(dst::ArgN4) == size_t(src::ArgN4)) : true ) && + ( ( 6 > dst::rank_dynamic && 6 > src::rank_dynamic ) ? (size_t(dst::ArgN5) == size_t(src::ArgN5)) : true ) && + ( ( 7 > dst::rank_dynamic && 7 > src::rank_dynamic ) ? (size_t(dst::ArgN6) == size_t(src::ArgN6)) : true ) && + ( ( 8 > dst::rank_dynamic && 8 > src::rank_dynamic ) ? (size_t(dst::ArgN7) == size_t(src::ArgN7)) : true ) + )}; + +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct ALL_t { + KOKKOS_INLINE_FUNCTION + constexpr const ALL_t & operator()() const { return *this ; } +}; + +template< class T > +struct is_integral_extent_type +{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; }; + +template< class iType > +struct is_integral_extent_type< std::pair<iType,iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +template< class iType > +struct is_integral_extent_type< Kokkos::pair<iType,iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +// Assuming '2 == initializer_list<iType>::size()' +template< class iType > +struct is_integral_extent_type< std::initializer_list<iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +template < unsigned I , class ... Args > +struct is_integral_extent +{ + // get_type is void when sizeof...(Args) <= I + typedef typename std::remove_cv< + typename std::remove_reference< + typename Kokkos::Impl::get_type<I,Args... + >::type >::type >::type type ; + + enum { value = is_integral_extent_type<type>::value }; + + static_assert( value || + std::is_integral<type>::value || + std::is_same<type,void>::value + , "subview argument must be either integral or integral extent" ); +}; + +template< unsigned DomainRank , unsigned RangeRank > +struct SubviewExtents { +private: + + // Cannot declare zero-length arrays + enum { InternalRangeRank = RangeRank ? RangeRank : 1u }; + + size_t m_begin[ DomainRank ]; + size_t m_length[ InternalRangeRank ]; + unsigned m_index[ InternalRangeRank ]; + + template< size_t ... DimArgs > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim ) + { return true ; } + + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const T & val + , Args ... args ) + { + const size_t v = static_cast<size_t>(val); + + m_begin[ domain_rank ] = v ; + + return set( domain_rank + 1 , range_rank , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( v < dim.extent( domain_rank ) ) +#endif + ; + } + + // ALL_t + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::Experimental::Impl::ALL_t + , Args ... args ) + { + m_begin[ domain_rank ] = 0 ; + m_length[ range_rank ] = dim.extent( domain_rank ); + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // std::pair range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::pair<T,T> & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.first ); + const size_t e = static_cast<size_t>( val.second ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + // Kokkos::pair range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::pair<T,T> & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.first ); + const size_t e = static_cast<size_t>( val.second ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + // { begin , end } range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::initializer_list< T > & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.begin()[0] ); + const size_t e = static_cast<size_t>( val.begin()[1] ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( val.size() == 2 ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + //------------------------------ + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + + template< size_t ... DimArgs > + void error( char * + , int + , unsigned + , unsigned + , const ViewDimension< DimArgs ... > & ) const + {} + + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const T & val + , Args ... args ) const + { + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu < %lu %c" + , static_cast<unsigned long>(val) + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... ); + } + + // std::pair range + template< size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::Experimental::Impl::ALL_t + , Args ... args ) const + { + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " Kokkos::ALL %c" + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // std::pair range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::pair<T,T> & val + , Args ... args ) const + { + // d <= e - b + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.second ) + , static_cast<unsigned long>( val.begin ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // Kokkos::pair range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::pair<T,T> & val + , Args ... args ) const + { + // d <= e - b + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.second ) + , static_cast<unsigned long>( val.begin ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // { begin , end } range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::initializer_list< T > & val + , Args ... args ) const + { + // d <= e - b + int n = 0 ; + if ( val.size() == 2 ) { + n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.begin()[0] ) + , static_cast<unsigned long>( val.begin()[1] ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + } + else { + n = std::min( buf_len , + snprintf( buf , buf_len + , " { ... }.size() == %u %c" + , unsigned(val.size()) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + } + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + + const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error ("); + error( buffer+n , LEN-n , 0 , 0 , dim , args... ); + + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + Kokkos::abort("Kokkos::subview bounds error"); +#endif + } + +#else + + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + void error( const ViewDimension< DimArgs ... > & , Args ... ) const {} + +#endif + +public: + + template< size_t ... DimArgs , class ... Args > + KOKKOS_INLINE_FUNCTION + SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args ) + { + static_assert( DomainRank == sizeof...(DimArgs) , "" ); + static_assert( DomainRank == sizeof...(Args) , "" ); + + // Verifies that all arguments, up to 8, are integral types, + // integral extents, or don't exist. + static_assert( RangeRank == + unsigned( is_integral_extent<0,Args...>::value ) + + unsigned( is_integral_extent<1,Args...>::value ) + + unsigned( is_integral_extent<2,Args...>::value ) + + unsigned( is_integral_extent<3,Args...>::value ) + + unsigned( is_integral_extent<4,Args...>::value ) + + unsigned( is_integral_extent<5,Args...>::value ) + + unsigned( is_integral_extent<6,Args...>::value ) + + unsigned( is_integral_extent<7,Args...>::value ) , "" ); + + if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; } + + if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... ); + } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_t domain_offset( const iType i ) const + { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_t range_extent( const iType i ) const + { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr unsigned range_index( const iType i ) const + { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; } +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief Given a value type and dimension generate the View data type */ +template< class T , class Dim > +struct ViewDataType ; + +template< class T > +struct ViewDataType< T , ViewDimension<> > +{ + typedef T type ; +}; + +template< class T , size_t ... Args > +struct ViewDataType< T , ViewDimension< 0 , Args... > > +{ + typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ; +}; + +template< class T , size_t N , size_t ... Args > +struct ViewDataType< T , ViewDimension< N , Args... > > +{ + typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ; +}; + +/**\brief Analysis of View data type. + * + * Data type conforms to one of the following patterns : + * {const} value_type [][#][#][#] + * {const} value_type ***[#][#][#] + * Where the sum of counts of '*' and '[#]' is at most ten. + * + * Provide typedef for the ViewDimension<...> and value_type. + */ +template< class T > +struct ViewArrayAnalysis +{ + typedef T value_type ; + typedef typename std::add_const< T >::type const_value_type ; + typedef typename std::remove_const< T >::type non_const_value_type ; + typedef ViewDimension<> static_dimension ; + typedef ViewDimension<> dynamic_dimension ; + typedef ViewDimension<> dimension ; +}; + +template< class T , size_t N > +struct ViewArrayAnalysis< T[N] > +{ +private: + typedef ViewArrayAnalysis< T > nested ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::static_dimension::template prepend<N>::type + static_dimension ; + + typedef typename nested::dynamic_dimension dynamic_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + +template< class T > +struct ViewArrayAnalysis< T[] > +{ +private: + typedef ViewArrayAnalysis< T > nested ; + typedef typename nested::dimension nested_dimension ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::dynamic_dimension::template prepend<0>::type + dynamic_dimension ; + + typedef typename nested::static_dimension static_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + +template< class T > +struct ViewArrayAnalysis< T* > +{ +private: + typedef ViewArrayAnalysis< T > nested ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::dynamic_dimension::template prepend<0>::type + dynamic_dimension ; + + typedef typename nested::static_dimension static_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + + +template< class DataType , class ArrayLayout , class ValueType > +struct ViewDataAnalysis +{ +private: + + typedef ViewArrayAnalysis< DataType > array_analysis ; + + // ValueType is opportunity for partial specialization. + // Must match array analysis when this default template is used. + static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" ); + +public: + + typedef void specialize ; // No specialization + + typedef typename array_analysis::dimension dimension ; + typedef typename array_analysis::value_type value_type ; + typedef typename array_analysis::const_value_type const_value_type ; + typedef typename array_analysis::non_const_value_type non_const_value_type ; + + // Generate analogous multidimensional array specification type. + typedef typename ViewDataType< value_type , dimension >::type type ; + typedef typename ViewDataType< const_value_type , dimension >::type const_type ; + typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ; + + // Generate "flattened" multidimensional array specification type. + typedef type scalar_array_type ; + typedef const_type const_scalar_array_type ; + typedef non_const_type non_const_scalar_array_type ; +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template < class Dimension , class Layout , typename Enable = void > +struct ViewOffset { + using is_mapping_plugin = std::false_type ; +}; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutLeft + , typename std::enable_if<( 1 >= Dimension::rank + || + 0 == Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutLeft array_layout ; + + dimension_type m_dim ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i0 + m_dim.N0 * i1 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 ); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * ( + i6 + m_dim.N6 * i7 )))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; } + if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } + if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } + if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } + if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } + if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } + if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } + if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & + , Kokkos::LayoutLeft const & arg_layout + ) + : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" ); + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" ); + if ( rhs.m_stride.S0 != 1 ) { + Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" ); + } + } + + //---------------------------------------- + // Subview construction + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( + const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , + const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) + : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( ( 0 == dimension_type::rank ) || + ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutLeft + , typename std::enable_if<( 1 < Dimension::rank + && + 0 < Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutLeft array_layout ; + + dimension_type m_dim ; + size_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i0 + m_stride * i1 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 + m_stride * ( i1 + m_dim.N1 * i2 ); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * ( + i6 + m_dim.N6 * i7 )))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < dimension_type::rank ) { s[1] = m_stride ; } + if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } + if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } + if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } + if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } + if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } + if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } + if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } + } + + //---------------------------------------- + +private: + + template< unsigned TrivialScalarSize > + struct Padding { + enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + + // If memory alignment is a multiple of the trivial scalar size then attempt to align. + enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; + enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + + KOKKOS_INLINE_FUNCTION + static constexpr size_t stride( size_t const N ) + { + return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) + ? N + align - ( N % div_ok ) : N ; + } + }; + +public: + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + /* Enable padding for trivial scalar types with non-zero trivial scalar size */ + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size + , Kokkos::LayoutLeft const & arg_layout + ) + : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] + , arg_layout.dimension[2] , arg_layout.dimension[3] + , arg_layout.dimension[4] , arg_layout.dimension[5] + , arg_layout.dimension[6] , arg_layout.dimension[7] + ) + , m_stride( Padding<TrivialScalarSize>::stride( arg_layout.dimension[0] ) ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_1() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #0. + // The source dimension #0 must be non-zero for stride-one leading dimension. + // At most subsequent dimension can be non-zero. + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , + const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , 0, 0, 0, 0, 0, 0 ) + , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() : + ( 2 == sub.range_index(1) ? rhs.stride_2() : + ( 3 == sub.range_index(1) ? rhs.stride_3() : + ( 4 == sub.range_index(1) ? rhs.stride_4() : + ( 5 == sub.range_index(1) ? rhs.stride_5() : + ( 6 == sub.range_index(1) ? rhs.stride_6() : + ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 )))))))) + { + static_assert( ( 2 == dimension_type::rank ) && + ( 2 == dimension_type::rank_dynamic ) && + ( 2 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutRight + , typename std::enable_if<( 1 >= Dimension::rank + || + 0 == Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutRight array_layout ; + + dimension_type m_dim ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i1 + m_dim.N1 * i0 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 )); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 )))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 )))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i7 + m_dim.N7 * ( + i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } + if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } + if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } + if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } + if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } + if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } + if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; } + if ( 0 < dimension_type::rank ) { s[0] = n ; } + s[dimension_type::rank] = n * m_dim.N0 ; + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & + , Kokkos::LayoutRight const & arg_layout + ) + : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" ); + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" ); + if ( rhs.m_stride.S0 != 1 ) { + Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft/Right from LayoutStride requires stride == 1" ); + } + } + + //---------------------------------------- + // Subview construction + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( ( 0 == dimension_type::rank_dynamic ) || + ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutRight + , typename std::enable_if<( 1 < Dimension::rank + && + 0 < Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutRight array_layout ; + + dimension_type m_dim ; + size_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i1 + i0 * m_stride ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )) + + i0 * m_stride ; + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 ))) + + i0 * m_stride ; + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )))) + + i0 * m_stride ; + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 ))))) + + i0 * m_stride ; + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i7 + m_dim.N7 * ( + i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )))))) + + i0 * m_stride ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_stride ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const + { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } + if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } + if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } + if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } + if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } + if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } + if ( 1 < dimension_type::rank ) { s[1] = n ; } + if ( 0 < dimension_type::rank ) { s[0] = m_stride ; } + s[dimension_type::rank] = m_stride * m_dim.N0 ; + } + + //---------------------------------------- + +private: + + template< unsigned TrivialScalarSize > + struct Padding { + enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + + // If memory alignment is a multiple of the trivial scalar size then attempt to align. + enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; + enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + + KOKKOS_INLINE_FUNCTION + static constexpr size_t stride( size_t const N ) + { + return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) + ? N + align - ( N % div_ok ) : N ; + } + }; + +public: + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + /* Enable padding for trivial scalar types with non-zero trivial scalar size. */ + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size + , Kokkos::LayoutRight const & arg_layout + ) + : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] + , arg_layout.dimension[2] , arg_layout.dimension[3] + , arg_layout.dimension[4] , arg_layout.dimension[5] + , arg_layout.dimension[6] , arg_layout.dimension[7] + ) + , m_stride( Padding<TrivialScalarSize>:: + stride( /* 2 <= rank */ + m_dim.N1 * ( dimension_type::rank == 2 ? 1 : + m_dim.N2 * ( dimension_type::rank == 3 ? 1 : + m_dim.N3 * ( dimension_type::rank == 4 ? 1 : + m_dim.N4 * ( dimension_type::rank == 5 ? 1 : + m_dim.N5 * ( dimension_type::rank == 6 ? 1 : + m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) )) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_0() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + // Last dimension must be non-zero + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , 0, 0, 0, 0, 0, 0 ) + , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : ( + 1 == sub.range_index(0) ? rhs.stride_1() : ( + 2 == sub.range_index(0) ? rhs.stride_2() : ( + 3 == sub.range_index(0) ? rhs.stride_3() : ( + 4 == sub.range_index(0) ? rhs.stride_4() : ( + 5 == sub.range_index(0) ? rhs.stride_5() : ( + 6 == sub.range_index(0) ? rhs.stride_6() : 0 ))))))) + { + // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #0. + // The source dimension #0 must be non-zero for stride-one leading dimension. + // At most subsequent dimension can be non-zero. + + static_assert( ( 2 == dimension_type::rank ) && + ( 2 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +/* Strided array layout only makes sense for 0 < rank */ +/* rank = 0 included for DynRankView case */ + +template< unsigned Rank > +struct ViewStride ; + +template<> +struct ViewStride<0> { + enum { S0 = 0 , S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t , size_t , size_t , size_t + , size_t , size_t , size_t , size_t ) + {} +}; + +template<> +struct ViewStride<1> { + size_t S0 ; + enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t , size_t , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) + {} +}; + +template<> +struct ViewStride<2> { + size_t S0 , S1 ; + enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) + {} +}; + +template<> +struct ViewStride<3> { + size_t S0 , S1 , S2 ; + enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) + {} +}; + +template<> +struct ViewStride<4> { + size_t S0 , S1 , S2 , S3 ; + enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + {} +}; + +template<> +struct ViewStride<5> { + size_t S0 , S1 , S2 , S3 , S4 ; + enum { S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) + {} +}; + +template<> +struct ViewStride<6> { + size_t S0 , S1 , S2 , S3 , S4 , S5 ; + enum { S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) + {} +}; + +template<> +struct ViewStride<7> { + size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ; + enum { S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t aS6 , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) + {} +}; + +template<> +struct ViewStride<8> { + size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 ) + {} +}; + +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutStride + , void > +{ +private: + typedef ViewStride< Dimension::rank > stride_type ; +public: + + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutStride array_layout ; + + dimension_type m_dim ; + stride_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const + { + return i0 * m_stride.S0 ; + } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 ; + } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 ; + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 ; + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 ; + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 ; + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 + + i6 * m_stride.S6 ; + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 + + i6 * m_stride.S6 + + i7 * m_stride.S7 ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_stride.S0 + , m_dim.N1 , m_stride.S1 + , m_dim.N2 , m_stride.S2 + , m_dim.N3 , m_stride.S3 + , m_dim.N4 , m_stride.S4 + , m_dim.N5 , m_stride.S5 + , m_dim.N6 , m_stride.S6 + , m_dim.N7 , m_stride.S7 + ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + +private: + + KOKKOS_INLINE_FUNCTION + static constexpr size_type Max( size_type lhs , size_type rhs ) + { return lhs < rhs ? rhs : lhs ; } + +public: + + /* Span of the range space, largest stride * dimension */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { + return Max( m_dim.N0 * m_stride.S0 , + Max( m_dim.N1 * m_stride.S1 , + Max( m_dim.N2 * m_stride.S2 , + Max( m_dim.N3 * m_stride.S3 , + Max( m_dim.N4 * m_stride.S4 , + Max( m_dim.N5 * m_stride.S5 , + Max( m_dim.N6 * m_stride.S6 , + m_dim.N7 * m_stride.S7 ))))))); + } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; } + if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; } + if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; } + if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; } + if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; } + if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; } + if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; } + if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; } + s[dimension_type::rank] = span(); + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( std::integral_constant<unsigned,0> const & + , Kokkos::LayoutStride const & rhs ) + : m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3] + , rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] ) + , m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3] + , rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] ) + {} + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3() + , rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + +private: + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION static + constexpr size_t stride + ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) + { + return r > 7 ? 0 : ( + r == 0 ? rhs.stride_0() : ( + r == 1 ? rhs.stride_1() : ( + r == 2 ? rhs.stride_2() : ( + r == 3 ? rhs.stride_3() : ( + r == 4 ? rhs.stride_4() : ( + r == 5 ? rhs.stride_5() : ( + r == 6 ? rhs.stride_6() : rhs.stride_7() ))))))); + } + +public: + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + // range_extent(r) returns 0 when dimension_type::rank <= r + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , sub.range_extent(2) + , sub.range_extent(3) + , sub.range_extent(4) + , sub.range_extent(5) + , sub.range_extent(6) + , sub.range_extent(7) + ) + // range_index(r) returns ~0u when dimension_type::rank <= r + , m_stride( stride( sub.range_index(0), rhs ) + , stride( sub.range_index(1), rhs ) + , stride( sub.range_index(2), rhs ) + , stride( sub.range_index(3), rhs ) + , stride( sub.range_index(4), rhs ) + , stride( sub.range_index(5), rhs ) + , stride( sub.range_index(6), rhs ) + , stride( sub.range_index(7), rhs ) + ) + {} +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief ViewDataHandle provides the type of the 'data handle' which the view + * uses to access data with the [] operator. It also provides + * an allocate function and a function to extract a raw ptr from the + * data handle. ViewDataHandle also defines an enum ReferenceAble which + * specifies whether references/pointers to elements can be taken and a + * 'return_type' which is what the view operators will give back. + * Specialisation of this object allows three things depending + * on ViewTraits and compiler options: + * (i) Use special allocator (e.g. huge pages/small pages and pinned memory) + * (ii) Use special data handle type (e.g. add Cuda Texture Object) + * (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads) + */ +template< class Traits , class Enable = void > +struct ViewDataHandle { + + typedef typename Traits::value_type value_type ; + typedef typename Traits::value_type * handle_type ; + typedef typename Traits::value_type & return_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + return handle_type( arg_data_ptr ); + } +}; + +template< class Traits > +struct ViewDataHandle< Traits , + typename std::enable_if<( std::is_same< typename Traits::non_const_value_type + , typename Traits::value_type >::value + && + std::is_same< typename Traits::specialize , void >::value + && + Traits::memory_traits::Atomic + )>::type > +{ + typedef typename Traits::value_type value_type ; + typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits > handle_type ; + typedef typename Kokkos::Impl::AtomicDataElement< Traits > return_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + return handle_type( arg_data_ptr ); + } +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +//---------------------------------------------------------------------------- + +/* + * The construction, assignment to default, and destruction + * are merged into a single functor. + * Primarily to work around an unresolved CUDA back-end bug + * that would lose the destruction cuda device function when + * called from the shared memory tracking destruction. + * Secondarily to have two fewer partial specializations. + */ +template< class ExecSpace + , class ValueType + , bool IsScalar = std::is_scalar< ValueType >::value + > +struct ViewValueFunctor ; + +template< class ExecSpace , class ValueType > +struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > +{ + typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; + + ExecSpace space ; + ValueType * ptr ; + size_t n ; + bool destroy ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i ) const + { + if ( destroy ) { (ptr+i)->~ValueType(); } + else { new (ptr+i) ValueType(); } + } + + ViewValueFunctor() = default ; + ViewValueFunctor( const ViewValueFunctor & ) = default ; + ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; + + ViewValueFunctor( ExecSpace const & arg_space + , ValueType * const arg_ptr + , size_t const arg_n ) + : space( arg_space ) + , ptr( arg_ptr ) + , n( arg_n ) + , destroy( false ) + {} + + void execute( bool arg ) + { + destroy = arg ; + if ( ! space.in_parallel() ) { + const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > + closure( *this , PolicyType( 0 , n ) ); + closure.execute(); + space.fence(); + } + else { + for ( size_t i = 0 ; i < n ; ++i ) operator()(i); + } + } + + void construct_shared_allocation() + { execute( false ); } + + void destroy_shared_allocation() + { execute( true ); } +}; + + +template< class ExecSpace , class ValueType > +struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ > +{ + typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; + + ExecSpace space ; + ValueType * ptr ; + size_t n ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i ) const + { ptr[i] = ValueType(); } + + ViewValueFunctor() = default ; + ViewValueFunctor( const ViewValueFunctor & ) = default ; + ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; + + ViewValueFunctor( ExecSpace const & arg_space + , ValueType * const arg_ptr + , size_t const arg_n ) + : space( arg_space ) + , ptr( arg_ptr ) + , n( arg_n ) + {} + + void construct_shared_allocation() + { + if ( ! space.in_parallel() ) { + const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > + closure( *this , PolicyType( 0 , n ) ); + closure.execute(); + space.fence(); + } + else { + for ( size_t i = 0 ; i < n ; ++i ) operator()(i); + } + } + + void destroy_shared_allocation() {} +}; + +//---------------------------------------------------------------------------- +/** \brief View mapping for non-specialized data type and standard layout */ +template< class Traits > +class ViewMapping< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , void >::value + && + ViewOffset< typename Traits::dimension + , typename Traits::array_layout + , void >::is_mapping_plugin::value + )>::type > +{ +private: + + template< class , class ... > friend class ViewMapping ; + template< class , class ... > friend class Kokkos::Experimental::View ; + + typedef ViewOffset< typename Traits::dimension + , typename Traits::array_layout + , void + > offset_type ; + + typedef typename ViewDataHandle< Traits >::handle_type handle_type ; + + handle_type m_handle ; + offset_type m_offset ; + + KOKKOS_INLINE_FUNCTION + ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) + : m_handle( arg_handle ) + , m_offset( arg_offset ) + {} + +public: + + //---------------------------------------- + // Domain dimensions + + enum { Rank = Traits::dimension::rank }; + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const + { return m_offset.m_dim.extent(r); } + + KOKKOS_INLINE_FUNCTION constexpr + typename Traits::array_layout layout() const + { return m_offset.layout(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } + + // Is a regular layout with uniform striding for each index. + using is_regular = typename offset_type::is_regular ; + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); } + + //---------------------------------------- + // Range span + + /** \brief Span of the mapped range */ + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); } + + /** \brief Is the mapped range span contiguous */ + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); } + + typedef typename ViewDataHandle< Traits >::return_type reference_type ; + typedef typename Traits::value_type * pointer_type ; + + /** \brief If data references are lvalue_reference than can query pointer to memory */ + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const + { + return std::is_lvalue_reference< reference_type >::value + ? (pointer_type) m_handle + : (pointer_type) 0 ; + } + + //---------------------------------------- + // The View class performs all rank and bounds checking before + // calling these element reference methods. + + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference() const { return m_handle[0]; } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename + std::enable_if< std::is_integral<I0>::value && + ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value + , reference_type >::type + reference( const I0 & i0 ) const { return m_handle[i0]; } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename + std::enable_if< std::is_integral<I0>::value && + std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value + , reference_type >::type + reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 ) const + { return m_handle[ m_offset(i0,i1) ]; } + + template< typename I0 , typename I1 , typename I2 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const + { return m_handle[ m_offset(i0,i1,i2) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const + { return m_handle[ m_offset(i0,i1,i2,i3) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } + + //---------------------------------------- + +private: + + enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; + enum { MemorySpanSize = sizeof(typename Traits::value_type) }; + +public: + + /** \brief Span, in bytes, of the referenced memory */ + KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const + { + return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION ~ViewMapping() {} + KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {} + KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + + KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + + //---------------------------------------- + + /**\brief Span, in bytes, of the required memory */ + KOKKOS_INLINE_FUNCTION + static constexpr size_t memory_span( typename Traits::array_layout const & arg_layout ) + { + typedef std::integral_constant< unsigned , 0 > padding ; + return ( offset_type( padding(), arg_layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + } + + /**\brief Wrap a span of memory */ + template< class ... P > + KOKKOS_INLINE_FUNCTION + ViewMapping( ViewCtorProp< P ... > const & arg_prop + , typename Traits::array_layout const & arg_layout + ) + : m_handle( ( (ViewCtorProp<void,pointer_type> const &) arg_prop ).value ) + , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout ) + {} + + //---------------------------------------- + /* Allocate and construct mapped array. + * Allocate via shared allocation record and + * return that record for allocation tracking. + */ + template< class ... P > + SharedAllocationRecord<> * + allocate_shared( ViewCtorProp< P... > const & arg_prop + , typename Traits::array_layout const & arg_layout ) + { + typedef ViewCtorProp< P... > alloc_prop ; + + typedef typename alloc_prop::execution_space execution_space ; + typedef typename Traits::memory_space memory_space ; + typedef typename Traits::value_type value_type ; + typedef ViewValueFunctor< execution_space , value_type > functor_type ; + typedef SharedAllocationRecord< memory_space , functor_type > record_type ; + + // Query the mapping for byte-size of allocation. + // If padding is allowed then pass in sizeof value type + // for padding computation. + typedef std::integral_constant + < unsigned + , alloc_prop::allow_padding ? sizeof(value_type) : 0 + > padding ; + + m_offset = offset_type( padding(), arg_layout ); + + const size_t alloc_size = + ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + + // Create shared memory tracking record with allocate memory from the memory space + record_type * const record = + record_type::allocate( ( (ViewCtorProp<void,memory_space> const &) arg_prop ).value + , ( (ViewCtorProp<void,std::string> const &) arg_prop ).value + , alloc_size ); + + // Only set the the pointer and initialize if the allocation is non-zero. + // May be zero if one of the dimensions is zero. + if ( alloc_size ) { + + m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); + + if ( alloc_prop::initialize ) { + // Assume destruction is only required when construction is requested. + // The ViewValueFunctor has both value construction and destruction operators. + record->m_destroy = functor_type( ( (ViewCtorProp<void,execution_space> const &) arg_prop).value + , (value_type *) m_handle + , m_offset.span() + ); + + // Construct values + record->m_destroy.construct_shared_allocation(); + } + } + + return record ; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/** \brief Assign compatible default mappings */ + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + ( + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + ) + )>::type > +{ +private: + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_dimension = + ViewDimensionAssignable< typename DstTraits::dimension + , typename SrcTraits::dimension >::value }; + + enum { is_assignable_layout = + std::is_same< typename DstTraits::array_layout + , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout + , Kokkos::LayoutStride >::value || + ( DstTraits::dimension::rank == 0 ) || + ( DstTraits::dimension::rank == 1 && + DstTraits::dimension::rank_dynamic == 1 ) + }; + +public: + + enum { is_assignable = is_assignable_value_type && + is_assignable_dimension && + is_assignable_layout }; + + typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + KOKKOS_INLINE_FUNCTION + static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) + { + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_dimension + , "View assignment must have compatible dimensions" ); + + static_assert( is_assignable_layout + , "View assignment must have compatible layout or have rank <= 1" ); + + typedef typename DstType::offset_type dst_offset_type ; + + if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) { + typedef typename DstTraits::dimension dst_dim; + bool assignable = + ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN0 == src.dimension_0() : true ) && + ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN1 == src.dimension_1() : true ) && + ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN2 == src.dimension_2() : true ) && + ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN3 == src.dimension_3() : true ) && + ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN4 == src.dimension_4() : true ) && + ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN5 == src.dimension_5() : true ) && + ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN6 == src.dimension_6() : true ) && + ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN7 == src.dimension_7() : true ) + ; + if(!assignable) + Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); + } + dst.m_offset = dst_offset_type( src.m_offset ); + dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Subview mapping. +// Deduce destination view type from source view traits and subview arguments + +template< class SrcTraits , class ... Args > +struct ViewMapping + < typename std::enable_if<( + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutStride >::value + ) + )>::type + , SrcTraits + , Args ... > +{ +private: + + static_assert( SrcTraits::rank == sizeof...(Args) , + "Subview mapping requires one argument for each dimension of source View" ); + + enum + { RZ = false + , R0 = bool(is_integral_extent<0,Args...>::value) + , R1 = bool(is_integral_extent<1,Args...>::value) + , R2 = bool(is_integral_extent<2,Args...>::value) + , R3 = bool(is_integral_extent<3,Args...>::value) + , R4 = bool(is_integral_extent<4,Args...>::value) + , R5 = bool(is_integral_extent<5,Args...>::value) + , R6 = bool(is_integral_extent<6,Args...>::value) + , R7 = bool(is_integral_extent<7,Args...>::value) + }; + + enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Whether right-most rank is a range. + enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : ( + 1 == SrcTraits::rank ? R0 : ( + 2 == SrcTraits::rank ? R1 : ( + 3 == SrcTraits::rank ? R2 : ( + 4 == SrcTraits::rank ? R3 : ( + 5 == SrcTraits::rank ? R4 : ( + 6 == SrcTraits::rank ? R5 : ( + 7 == SrcTraits::rank ? R6 : R7 )))))))) }; + + // Subview's layout + typedef typename std::conditional< + ( /* Same array layout IF */ + ( rank == 0 ) /* output rank zero */ + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) //replace with input rank + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) //replace input rank + ), typename SrcTraits::array_layout , Kokkos::LayoutStride + >::type array_layout ; + + typedef typename SrcTraits::value_type value_type ; + + typedef typename std::conditional< rank == 0 , value_type , + typename std::conditional< rank == 1 , value_type * , + typename std::conditional< rank == 2 , value_type ** , + typename std::conditional< rank == 3 , value_type *** , + typename std::conditional< rank == 4 , value_type **** , + typename std::conditional< rank == 5 , value_type ***** , + typename std::conditional< rank == 6 , value_type ****** , + typename std::conditional< rank == 7 , value_type ******* , + value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type + data_type ; + +public: + + typedef Kokkos::Experimental::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > traits_type ; + + typedef Kokkos::Experimental::View + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > type ; + + template< class MemoryTraits > + struct apply { + + static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); + + typedef Kokkos::Experimental::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > traits_type ; + + typedef Kokkos::Experimental::View + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > type ; + }; + + // The presumed type is 'ViewMapping< traits_type , void >' + // However, a compatible ViewMapping is acceptable. + template< class DstTraits > + KOKKOS_INLINE_FUNCTION + static void assign( ViewMapping< DstTraits , void > & dst + , ViewMapping< SrcTraits , void > const & src + , Args ... args ) + { + static_assert( + ViewMapping< DstTraits , traits_type , void >::is_assignable , + "Subview destination type must be compatible with subview derived type" ); + + typedef ViewMapping< DstTraits , void > DstType ; + + typedef typename DstType::offset_type dst_offset_type ; + typedef typename DstType::handle_type dst_handle_type ; + + const SubviewExtents< SrcTraits::rank , rank > + extents( src.m_offset.m_dim , args... ); + + dst.m_offset = dst_offset_type( src.m_offset , extents ); + dst.m_handle = dst_handle_type( src.m_handle + + src.m_offset( extents.domain_offset(0) + , extents.domain_offset(1) + , extents.domain_offset(2) + , extents.domain_offset(3) + , extents.domain_offset(4) + , extents.domain_offset(5) + , extents.domain_offset(6) + , extents.domain_offset(7) + ) ); + } +}; + + + +//---------------------------------------------------------------------------- + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< unsigned , class MapType > +KOKKOS_INLINE_FUNCTION +bool view_verify_operator_bounds( const MapType & ) +{ return true ; } + +template< unsigned R , class MapType , class iType , class ... Args > +KOKKOS_INLINE_FUNCTION +bool view_verify_operator_bounds + ( const MapType & map + , const iType & i + , Args ... args + ) +{ + return ( size_t(i) < map.extent(R) ) + && view_verify_operator_bounds<R+1>( map , args ... ); +} + +template< unsigned , class MapType > +inline +void view_error_operator_bounds( char * , int , const MapType & ) +{} + +template< unsigned R , class MapType , class iType , class ... Args > +inline +void view_error_operator_bounds + ( char * buf + , int len + , const MapType & map + , const iType & i + , Args ... args + ) +{ + const int n = + snprintf(buf,len," %ld < %ld %c" + , static_cast<unsigned long>(i) + , static_cast<unsigned long>( map.extent(R) ) + , ( sizeof...(Args) ? ',' : ')' ) + ); + view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); +} + +template< class MapType , class ... Args > +KOKKOS_INLINE_FUNCTION +void view_verify_operator_bounds + ( const MapType & map , Args ... args ) +{ + if ( ! view_verify_operator_bounds<0>( map , args ... ) ) { +#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + int n = snprintf(buf,LEN,"View bounds error(" ); + view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + Kokkos::abort("View bounds error"); +#endif + } +} + + +class Error_view_scalar_reference_to_non_scalar_view ; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */ + diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b3749e853a85eea341c4ce8462aec755de4bb11 --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp @@ -0,0 +1,227 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEWTILE_HPP +#define KOKKOS_EXPERIMENTAL_VIEWTILE_HPP + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// View mapping for rank two tiled array + +template< class L > +struct is_layout_tile : public std::false_type {}; + +template< unsigned N0 , unsigned N1 > +struct is_layout_tile< Kokkos::LayoutTileLeft<N0,N1,true> > : public std::true_type {}; + +template< class Dimension , class Layout > +struct ViewOffset< Dimension , Layout , + typename std::enable_if<( + ( Dimension::rank == 2 ) + && + is_layout_tile< Layout >::value + )>::type > +{ +public: + + enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) }; + enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) }; + enum { SHIFT_T = SHIFT_0 + SHIFT_1 }; + enum { MASK_0 = Layout::N0 - 1 }; + enum { MASK_1 = Layout::N1 - 1 }; + + // Is an irregular layout that does not have uniform striding for each index. + using is_mapping_plugin = std::true_type ; + using is_regular = std::false_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Layout array_layout ; + + dimension_type m_dim ; + size_type m_tile_N0 ; + + //---------------------------------------- + + // Only instantiated for rank 2 + template< typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 + , int = 0 , int = 0 + , int = 0 , int = 0 + , int = 0 , int = 0 + ) const + { + return /* ( ( Tile offset ) * Tile size ) */ + ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << SHIFT_T) + + /* ( Offset within tile ) */ + ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr + array_layout layout() const + { return array_layout( m_dim.N0 , m_dim.N1 ); } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return 1 ; } + + KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 ; } + + // Strides are meaningless due to irregularity + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; } + + KOKKOS_INLINE_FUNCTION constexpr size_type span() const + { + // ( TileDim0 * ( TileDim1 ) ) * TileSize + return ( m_tile_N0 * ( ( m_dim.N1 + MASK_1 ) >> SHIFT_1 ) ) << SHIFT_T ; + } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const + { + // Only if dimensions align with tile size + return ( m_dim.N0 & MASK_0 ) == 0 && ( m_dim.N1 & MASK_1 ) == 0 ; + } + + //---------------------------------------- + + ~ViewOffset() = default ; + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & , + array_layout const arg_layout ) + : m_dim( arg_layout.dimension[0], arg_layout.dimension[1], 0, 0, 0, 0, 0, 0 ) + , m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ ) + {} +}; + +template< typename T , unsigned N0 , unsigned N1 , class ... P + , typename iType0 , typename iType1 + > +struct ViewMapping + < void + , Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> + , Kokkos::LayoutTileLeft<N0,N1,true> + , iType0 + , iType1 > +{ + typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ; + typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ; + typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ; + typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const size_t i_tile0 + , const size_t i_tile1 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_handle + + ( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) , + dst_offset_type() ); + } +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +namespace Kokkos { +namespace Experimental { + +template< typename T , unsigned N0 , unsigned N1 , class ... P > +KOKKOS_INLINE_FUNCTION +Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... > +tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ; + + return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 ); +} + +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWTILE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0246a7b9af8f968fe6295bfad20f765265049906 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -0,0 +1,197 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_ANALYZE_POLICY_HPP +#define KOKKOS_IMPL_ANALYZE_POLICY_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Concepts.hpp> +#include <impl/Kokkos_Tags.hpp> + +namespace Kokkos { namespace Impl { + +template < typename ExecutionSpace = void + , typename Schedule = void + , typename WorkTag = void + , typename IndexType = void + , typename IterationPattern = void + > +struct PolicyTraitsBase +{ + using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, IterationPattern>; + + using execution_space = ExecutionSpace; + using schedule_type = Schedule; + using work_tag = WorkTag; + using index_type = IndexType; + using iteration_pattern = IterationPattern; +}; + + +template <typename PolicyBase, typename ExecutionSpace> +struct SetExecutionSpace +{ + static_assert( is_void<typename PolicyBase::execution_space>::value + , "Kokkos Error: More than one execution space given" ); + using type = PolicyTraitsBase< ExecutionSpace + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + >; +}; + +template <typename PolicyBase, typename Schedule> +struct SetSchedule +{ + static_assert( is_void<typename PolicyBase::schedule_type>::value + , "Kokkos Error: More than one schedule type given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , Schedule + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + >; +}; + +template <typename PolicyBase, typename WorkTag> +struct SetWorkTag +{ + static_assert( is_void<typename PolicyBase::work_tag>::value + , "Kokkos Error: More than one work tag given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , WorkTag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + >; +}; + +template <typename PolicyBase, typename IndexType> +struct SetIndexType +{ + static_assert( is_void<typename PolicyBase::index_type>::value + , "Kokkos Error: More than one index type given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , IndexType + , typename PolicyBase::iteration_pattern + >; +}; + + +template <typename PolicyBase, typename IterationPattern> +struct SetIterationPattern +{ + static_assert( is_void<typename PolicyBase::iteration_pattern>::value + , "Kokkos Error: More than one iteration_pattern given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , IterationPattern + >; +}; + + +template <typename Base, typename... Traits> +struct AnalyzePolicy; + +template <typename Base, typename T, typename... Traits> +struct AnalyzePolicy<Base, T, Traits...> : public + AnalyzePolicy< + typename std::conditional< is_execution_space<T>::value , SetExecutionSpace<Base,T> + , typename std::conditional< is_schedule_type<T>::value , SetSchedule<Base,T> + , typename std::conditional< is_index_type<T>::value , SetIndexType<Base,T> + , typename std::conditional< std::is_integral<T>::value , SetIndexType<Base, IndexType<T> > + , typename std::conditional< is_iteration_pattern<T>::value, SetIterationPattern<Base,T> + , SetWorkTag<Base,T> + >::type >::type >::type >::type>::type::type + , Traits... + > +{}; + +template <typename Base> +struct AnalyzePolicy<Base> +{ + using execution_space = typename std::conditional< is_void< typename Base::execution_space >::value + , DefaultExecutionSpace + , typename Base::execution_space + >::type; + + using schedule_type = typename std::conditional< is_void< typename Base::schedule_type >::value + , Schedule< Static > + , typename Base::schedule_type + >::type; + + using work_tag = typename Base::work_tag; + + using index_type = typename std::conditional< is_void< typename Base::index_type >::value + , IndexType< typename execution_space::size_type > + , typename Base::index_type + >::type + ::type // nasty hack to make index_type into an integral_type + ; // instead of the wrapped IndexType<T> for backwards compatibility + + using iteration_pattern = typename std::conditional< is_void< typename Base::iteration_pattern >::value + , void // TODO set default iteration pattern + , typename Base::iteration_pattern + >::type; + using type = PolicyTraitsBase< execution_space + , schedule_type + , work_tag + , index_type + , iteration_pattern + >; +}; + +template <typename... Traits> +struct PolicyTraits + : public AnalyzePolicy< PolicyTraitsBase<>, Traits... >::type +{}; + +}} // namespace Kokkos::Impl + + +#endif //KOKKOS_IMPL_ANALYZE_POLICY_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2de9df008ee5b42b5d38727ead56bae768869c43 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp @@ -0,0 +1,260 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_ANALYZESHAPE_HPP +#define KOKKOS_ANALYZESHAPE_HPP + +#include <impl/Kokkos_Shape.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +/** \brief Analyze the array shape defined by a Kokkos::View data type. + * + * It is presumed that the data type can be mapped down to a multidimensional + * array of an intrinsic scalar numerical type (double, float, int, ... ). + * The 'value_type' of an array may be an embedded aggregate type such + * as a fixed length array 'Array<T,N>'. + * In this case the 'array_intrinsic_type' represents the + * underlying array of intrinsic scalar numerical type. + * + * The embedded aggregate type must have an AnalyzeShape specialization + * to map it down to a shape and intrinsic scalar numerical type. + */ +template< class T > +struct AnalyzeShape : public Shape< sizeof(T) , 0 > +{ + typedef void specialize ; + + typedef Shape< sizeof(T), 0 > shape ; + + typedef T array_intrinsic_type ; + typedef T value_type ; + typedef T type ; + + typedef const T const_array_intrinsic_type ; + typedef const T const_value_type ; + typedef const T const_type ; + + typedef T non_const_array_intrinsic_type ; + typedef T non_const_value_type ; + typedef T non_const_type ; +}; + +template<> +struct AnalyzeShape<void> : public Shape< 0 , 0 > +{ + typedef void specialize ; + + typedef Shape< 0 , 0 > shape ; + + typedef void array_intrinsic_type ; + typedef void value_type ; + typedef void type ; + typedef const void const_array_intrinsic_type ; + typedef const void const_value_type ; + typedef const void const_type ; + typedef void non_const_array_intrinsic_type ; + typedef void non_const_value_type ; + typedef void non_const_type ; +}; + +template< class T > +struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape +{ +private: + typedef AnalyzeShape<T> nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename nested::shape shape ; + + typedef typename nested::const_array_intrinsic_type array_intrinsic_type ; + typedef typename nested::const_value_type value_type ; + typedef typename nested::const_type type ; + + typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type const_type ; + + typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type non_const_type ; +}; + +template< class T > +struct AnalyzeShape< T * > + : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type +{ +private: + typedef AnalyzeShape<T> nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; + + typedef typename nested::array_intrinsic_type * array_intrinsic_type ; + typedef typename nested::value_type value_type ; + typedef typename nested::type * type ; + + typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type * const_type ; + + typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type * non_const_type ; +}; + +template< class T > +struct AnalyzeShape< T[] > + : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type +{ +private: + typedef AnalyzeShape<T> nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; + + typedef typename nested::array_intrinsic_type array_intrinsic_type [] ; + typedef typename nested::value_type value_type ; + typedef typename nested::type type [] ; + + typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type const_type [] ; + + typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type non_const_type [] ; +}; + +template< class T > +struct AnalyzeShape< const T[] > + : public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type +{ +private: + typedef AnalyzeShape< const T > nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; + + typedef typename nested::array_intrinsic_type array_intrinsic_type [] ; + typedef typename nested::value_type value_type ; + typedef typename nested::type type [] ; + + typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type const_type [] ; + + typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type non_const_type [] ; +}; + +template< class T , unsigned N > +struct AnalyzeShape< T[N] > + : public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type +{ +private: + typedef AnalyzeShape<T> nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename ShapeInsert< typename nested::shape , N >::type shape ; + + typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ; + typedef typename nested::value_type value_type ; + typedef typename nested::type type [N] ; + + typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type const_type [N] ; + + typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type non_const_type [N] ; +}; + +template< class T , unsigned N > +struct AnalyzeShape< const T[N] > + : public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type +{ +private: + typedef AnalyzeShape< const T > nested ; +public: + + typedef typename nested::specialize specialize ; + + typedef typename ShapeInsert< typename nested::shape , N >::type shape ; + + typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ; + typedef typename nested::value_type value_type ; + typedef typename nested::type type [N] ; + + typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::const_type const_type [N] ; + + typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ; + typedef typename nested::non_const_value_type non_const_value_type ; + typedef typename nested::non_const_type non_const_type [N] ; +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp new file mode 100644 index 0000000000000000000000000000000000000000..500e2b5a21f50c34200920946d7e852eb057a16f --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_HPP ) +#define KOKKOS_ATOMIC_ASSEMBLY_HPP +namespace Kokkos { + +namespace Impl { + struct cas128_t + { + uint64_t lower; + uint64_t upper; + + KOKKOS_INLINE_FUNCTION + cas128_t () { + lower = 0; + upper = 0; + } + + KOKKOS_INLINE_FUNCTION + cas128_t (const cas128_t& a) { + lower = a.lower; + upper = a.upper; + } + KOKKOS_INLINE_FUNCTION + cas128_t (volatile cas128_t* a) { + lower = a->lower; + upper = a->upper; + } + + KOKKOS_INLINE_FUNCTION + bool operator != (const cas128_t& a) const { + return (lower != a.lower) || upper!=a.upper; + } + + KOKKOS_INLINE_FUNCTION + void operator = (const cas128_t& a) { + lower = a.lower; + upper = a.upper; + } + KOKKOS_INLINE_FUNCTION + void operator = (const cas128_t& a) volatile { + lower = a.lower; + upper = a.upper; + } + } + __attribute__ (( __aligned__( 16 ) )); + + + #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) + inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap ) + { + bool swapped = false; + __asm__ __volatile__ + ( + "lock cmpxchg16b %1\n\t" + "setz %0" + : "=q" ( swapped ) + , "+m" ( *ptr ) + , "+d" ( cmp.upper ) + , "+a" ( cmp.lower ) + : "c" ( swap.upper ) + , "b" ( swap.lower ) + , "q" ( swapped ) + ); + return cmp; + } + #endif + +} +} + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fd7ea845e7633d7415b0b9cd147f1da51ef93632 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -0,0 +1,271 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP ) +#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- +// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type). +// Must cast-away 'volatile' for the CAS call. + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +__inline__ __device__ +int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) +{ return atomicCAS((int*)dest,compare,val); } + +__inline__ __device__ +unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val) +{ return atomicCAS((unsigned int*)dest,compare,val); } + +__inline__ __device__ +unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest , + const unsigned long long int compare , + const unsigned long long int val ) +{ return atomicCAS((unsigned long long int*)dest,compare,val); } + +template < typename T > +__inline__ __device__ +T atomic_compare_exchange( volatile T * const dest , const T & compare , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ + const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) ); + return *((T*)&tmp); +} + +template < typename T > +__inline__ __device__ +T atomic_compare_exchange( volatile T * const dest , const T & compare , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) +{ + typedef unsigned long long int type ; + const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) ); + return *((T*)&tmp); +} + +template < typename T > +__inline__ __device__ +T atomic_compare_exchange( volatile T * const dest , const T & compare , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + , const T >::type& val ) +{ + T return_val; + // This is a way to (hopefully) avoid dead lock in a warp + int done = 1; + while ( done>0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + if( return_val == compare ) + *dest = val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 0; + } + } + return return_val; +} + +//---------------------------------------------------------------------------- +// GCC native CAS supports int, long, unsigned int, unsigned long. +// Intel native CAS support int and long with the same interface as GCC. + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +KOKKOS_INLINE_FUNCTION +int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +KOKKOS_INLINE_FUNCTION +long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +#if defined( KOKKOS_ATOMICS_USE_GCC ) + +// GCC supports unsigned + +KOKKOS_INLINE_FUNCTION +unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +KOKKOS_INLINE_FUNCTION +unsigned long atomic_compare_exchange( volatile unsigned long * const dest , + const unsigned long compare , + const unsigned long val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +#endif + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; +#else + union U { + int i ; + T t ; + } tmp ; +#endif + + tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) ); + return tmp.t ; +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(long) , const T & >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; +#else + union U { + long i ; + T t ; + } tmp ; +#endif + + tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) ); + return tmp.t ; +} + +#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + + tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) ); + return tmp.t ; +} +#endif + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest , const T compare , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + if( return_val == compare ) { + // Don't use the following line of code here: + // + //const T tmp = *dest = val; + // + // Instead, put each assignment in its own statement. This is + // because the overload of T::operator= for volatile *this should + // return void, not volatile T&. See Kokkos #177: + // + // https://github.com/kokkos/kokkos/issues/177 + *dest = val; + const T tmp = *dest; + #ifndef KOKKOS_COMPILER_CLANG + (void) tmp; + #endif + } + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T compare, const T val ) +{ + T retval; +#pragma omp critical + { + retval = dest[0]; + if ( retval == compare ) + dest[0] = val; + } + return retval; +} + +#endif + +template <typename T> +KOKKOS_INLINE_FUNCTION +bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) +{ + return compare == atomic_compare_exchange(dest, compare, val); +} + +//---------------------------------------------------------------------------- + +} // namespace Kokkos + +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1438a37e454e556832549e2137202d971b4a09ce --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp @@ -0,0 +1,117 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT ) +#define KOKKOS_ATOMIC_DECREMENT + +namespace Kokkos { + +// Atomic increment +template<> +KOKKOS_INLINE_FUNCTION +void atomic_decrement<char>(volatile char* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock decb %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,-1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_decrement<short>(volatile short* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock decw %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,-1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_decrement<int>(volatile int* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock decl %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,-1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_decrement<long long int>(volatile long long int* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock decq %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,-1); +#endif +} + +template<typename T> +KOKKOS_INLINE_FUNCTION +void atomic_decrement(volatile T* a) { + Kokkos::atomic_fetch_add(a,-1); +} + +} // End of namespace Kokkos +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e8cac4ba3b82ba097016a3ba80b03b010a7df8c3 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -0,0 +1,359 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP ) +#define KOKKOS_ATOMIC_EXCHANGE_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +__inline__ __device__ +int atomic_exchange( volatile int * const dest , const int val ) +{ + // return __iAtomicExch( (int*) dest , val ); + return atomicExch( (int*) dest , val ); +} + +__inline__ __device__ +unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val ) +{ + // return __uAtomicExch( (unsigned int*) dest , val ); + return atomicExch( (unsigned int*) dest , val ); +} + +__inline__ __device__ +unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val ) +{ + // return __ullAtomicExch( (unsigned long long*) dest , val ); + return atomicExch( (unsigned long long*) dest , val ); +} + +/** \brief Atomic exchange for any type with compatible size */ +template< typename T > +__inline__ __device__ +T atomic_exchange( + volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ + // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); + int tmp = atomicExch( ((int*)dest) , *((int*)&val) ); + return *((T*)&tmp); +} + +template< typename T > +__inline__ __device__ +T atomic_exchange( + volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) +{ + typedef unsigned long long int type ; + // type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) ); + type tmp = atomicExch( ((type*)dest) , *((type*)&val) ); + return *((T*)&tmp); +} + +template < typename T > +__inline__ __device__ +T atomic_exchange( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + , const T >::type& val ) +{ + T return_val; + // This is a way to (hopefully) avoid dead lock in a warp + int done = 1; + while ( done > 0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 0; + } + } + return return_val; +} +/** \brief Atomic exchange for any type with compatible size */ +template< typename T > +__inline__ __device__ +void atomic_assign( + volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ + // (void) __ullAtomicExch( (int*) dest , *((int*)&val) ); + (void) atomicExch( ((int*)dest) , *((int*)&val) ); +} + +template< typename T > +__inline__ __device__ +void atomic_assign( + volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) +{ + typedef unsigned long long int type ; + // (void) __ullAtomicExch( (type*) dest , *((type*)&val) ); + (void) atomicExch( ((type*)dest) , *((type*)&val) ); +} + +template< typename T > +__inline__ __device__ +void atomic_assign( + volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(unsigned long long int) + , const T & >::type val ) +{ + (void) atomic_exchange(dest,val); +} + +//---------------------------------------------------------------------------- + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_exchange( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long) + , const T & >::type val ) +{ + typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ; + + const type v = *((type*)&val); // Extract to be sure the value doesn't change + + type assumed ; + +#ifdef KOKKOS_HAVE_CXX11 + union U { + T val_T ; + type val_type ; + KOKKOS_INLINE_FUNCTION U() {}; + } old ; +#else + union { T val_T ; type val_type ; } old ; +#endif + + old.val_T = *dest ; + + do { + assumed = old.val_type ; + old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v ); + } while ( assumed != old.val_type ); + + return old.val_T ; +} + +#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_exchange( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) + , const T & >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; + + oldval.t = *dest ; + newval.t = val; + + do { + assume.i = oldval.i ; + oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} +#endif + +//---------------------------------------------------------------------------- + +template < typename T > +inline +T atomic_exchange( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + // Don't use the following line of code here: + // + //const T tmp = *dest = val; + // + // Instead, put each assignment in its own statement. This is + // because the overload of T::operator= for volatile *this should + // return void, not volatile T&. See Kokkos #177: + // + // https://github.com/kokkos/kokkos/issues/177 + *dest = val; + const T tmp = *dest; + #ifndef KOKKOS_COMPILER_CLANG + (void) tmp; + #endif + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} + +template< typename T > +KOKKOS_INLINE_FUNCTION +void atomic_assign( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long) + , const T & >::type val ) +{ + typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ; + + const type v = *((type*)&val); // Extract to be sure the value doesn't change + + type assumed ; + +#ifdef KOKKOS_HAVE_CXX11 + union U { + T val_T ; + type val_type ; + KOKKOS_INLINE_FUNCTION U() {}; + } old ; +#else + union { T val_T ; type val_type ; } old ; +#endif + + old.val_T = *dest ; + + do { + assumed = old.val_type ; + old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v ); + } while ( assumed != old.val_type ); +} + +#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) +template< typename T > +KOKKOS_INLINE_FUNCTION +void atomic_assign( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) + , const T & >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; + + oldval.t = *dest ; + newval.t = val; + do { + assume.i = oldval.i ; + oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i); + } while ( assume.i != oldval.i ); +} +#endif + +template < typename T > +inline +void atomic_assign( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ + while( !Impl::lock_address_host_space( (void*) dest ) ); + // This is likely an aggregate type with a defined + // 'volatile T & operator = ( const T & ) volatile' + // member. The volatile return value implicitly defines a + // dereference that some compilers (gcc 4.7.2) warn is being ignored. + // Suppress warning by casting return to void. + //(void)( *dest = val ); + *dest = val; + + Impl::unlock_address_host_space( (void*) dest ); +} +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_exchange( volatile T * const dest , const T val ) +{ + T retval; +//#pragma omp atomic capture + #pragma omp critical + { + retval = dest[0]; + dest[0] = val; + } + return retval; +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +void atomic_assign( volatile T * const dest , const T val ) +{ +//#pragma omp atomic + #pragma omp critical + { + dest[0] = val; + } +} + +#endif + +} // namespace Kokkos + +#endif + +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp new file mode 100644 index 0000000000000000000000000000000000000000..62dfcdd2f88934f8d48b51e0637e9487d92c9a7e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -0,0 +1,340 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP ) +#define KOKKOS_ATOMIC_FETCH_ADD_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +// Support for int, unsigned int, unsigned long long int, and float + +__inline__ __device__ +int atomic_fetch_add( volatile int * const dest , const int val ) +{ return atomicAdd((int*)dest,val); } + +__inline__ __device__ +unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) +{ return atomicAdd((unsigned int*)dest,val); } + +__inline__ __device__ +unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest , + const unsigned long long int val ) +{ return atomicAdd((unsigned long long int*)dest,val); } + +__inline__ __device__ +float atomic_fetch_add( volatile float * const dest , const float val ) +{ return atomicAdd((float*)dest,val); } + +template < typename T > +__inline__ __device__ +T atomic_fetch_add( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; +#else + union U { + int i ; + T t ; + } assume , oldval , newval ; +#endif + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t + val ; + oldval.i = atomicCAS( (int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < typename T > +__inline__ __device__ +T atomic_fetch_add( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; +#else + union U { + unsigned long long int i ; + T t ; + } assume , oldval , newval ; +#endif + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t + val ; + oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +//---------------------------------------------------------------------------- + +template < typename T > +__inline__ __device__ +T atomic_fetch_add( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + , const T >::type& val ) +{ + T return_val; + // This is a way to (hopefully) avoid dead lock in a warp + int done = 1; + while ( done>0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = return_val + val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 0; + } + } + return return_val; +} +//---------------------------------------------------------------------------- + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) +KOKKOS_INLINE_FUNCTION +int atomic_fetch_add( volatile int * dest , const int val ) +{ + int original = val; + + __asm__ __volatile__( + "lock xadd %1, %0" + : "+m" (*dest), "+r" (original) + : "m" (*dest), "r" (original) + : "memory" + ); + + return original; +} +#else +KOKKOS_INLINE_FUNCTION +int atomic_fetch_add( volatile int * const dest , const int val ) +{ return __sync_fetch_and_add(dest, val); } +#endif + +KOKKOS_INLINE_FUNCTION +long int atomic_fetch_add( volatile long int * const dest , const long int val ) +{ return __sync_fetch_and_add(dest,val); } + +#if defined( KOKKOS_ATOMICS_USE_GCC ) + +KOKKOS_INLINE_FUNCTION +unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) +{ return __sync_fetch_and_add(dest,val); } + +KOKKOS_INLINE_FUNCTION +unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val ) +{ return __sync_fetch_and_add(dest,val); } + +#endif + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_add( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; +#else + union U { + int i ; + T t ; + } assume , oldval , newval ; +#endif + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t + val ; + oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_add( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(long) , const T >::type val ) +{ +#ifdef KOKKOS_HAVE_CXX11 + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; +#else + union U { + long i ; + T t ; + } assume , oldval , newval ; +#endif + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t + val ; + oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_add( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t) , const T >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } assume , oldval , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t + val ; + oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} +#endif + +//---------------------------------------------------------------------------- + +template < typename T > +inline +T atomic_fetch_add( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + // Don't use the following line of code here: + // + //const T tmp = *dest = return_val + val; + // + // Instead, put each assignment in its own statement. This is + // because the overload of T::operator= for volatile *this should + // return void, not volatile T&. See Kokkos #177: + // + // https://github.com/kokkos/kokkos/issues/177 + *dest = return_val + val; + const T tmp = *dest; + (void) tmp; + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template< typename T > +T atomic_fetch_add( volatile T * const dest , const T val ) +{ + T retval; +#pragma omp atomic capture + { + retval = dest[0]; + dest[0] += val; + } + return retval; +} + +#endif + +//---------------------------------------------------------------------------- + +// Simpler version of atomic_fetch_add without the fetch +template <typename T> +KOKKOS_INLINE_FUNCTION +void atomic_add(volatile T * const dest, const T src) { + atomic_fetch_add(dest,src); +} + +} +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9b7ebae4ac6df12bae659e50aa7da34429ac3187 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP ) +#define KOKKOS_ATOMIC_FETCH_AND_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +// Support for int, unsigned int, unsigned long long int, and float + +__inline__ __device__ +int atomic_fetch_and( volatile int * const dest , const int val ) +{ return atomicAnd((int*)dest,val); } + +__inline__ __device__ +unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val ) +{ return atomicAnd((unsigned int*)dest,val); } + +#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ ) +__inline__ __device__ +unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest , + const unsigned long long int val ) +{ return atomicAnd((unsigned long long int*)dest,val); } +#endif + +//---------------------------------------------------------------------------- + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +KOKKOS_INLINE_FUNCTION +int atomic_fetch_and( volatile int * const dest , const int val ) +{ return __sync_fetch_and_and(dest,val); } + +KOKKOS_INLINE_FUNCTION +long int atomic_fetch_and( volatile long int * const dest , const long int val ) +{ return __sync_fetch_and_and(dest,val); } + +#if defined( KOKKOS_ATOMICS_USE_GCC ) + +KOKKOS_INLINE_FUNCTION +unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val ) +{ return __sync_fetch_and_and(dest,val); } + +KOKKOS_INLINE_FUNCTION +unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val ) +{ return __sync_fetch_and_and(dest,val); } + +#endif + +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template< typename T > +T atomic_fetch_and( volatile T * const dest , const T val ) +{ + T retval; +#pragma omp atomic capture + { + retval = dest[0]; + dest[0] &= val; + } + return retval; +} + +#endif + +//---------------------------------------------------------------------------- + +// Simpler version of atomic_fetch_and without the fetch +template <typename T> +KOKKOS_INLINE_FUNCTION +void atomic_and(volatile T * const dest, const T src) { + (void)atomic_fetch_and(dest,src); +} + +} + +#endif + + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f15e61a3aea2ac2e7120d88a7151390cc2bf0b73 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP ) +#define KOKKOS_ATOMIC_FETCH_OR_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +// Support for int, unsigned int, unsigned long long int, and float + +__inline__ __device__ +int atomic_fetch_or( volatile int * const dest , const int val ) +{ return atomicOr((int*)dest,val); } + +__inline__ __device__ +unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val ) +{ return atomicOr((unsigned int*)dest,val); } + +#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ ) +__inline__ __device__ +unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest , + const unsigned long long int val ) +{ return atomicOr((unsigned long long int*)dest,val); } +#endif + +//---------------------------------------------------------------------------- + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +KOKKOS_INLINE_FUNCTION +int atomic_fetch_or( volatile int * const dest , const int val ) +{ return __sync_fetch_and_or(dest,val); } + +KOKKOS_INLINE_FUNCTION +long int atomic_fetch_or( volatile long int * const dest , const long int val ) +{ return __sync_fetch_and_or(dest,val); } + +#if defined( KOKKOS_ATOMICS_USE_GCC ) + +KOKKOS_INLINE_FUNCTION +unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val ) +{ return __sync_fetch_and_or(dest,val); } + +KOKKOS_INLINE_FUNCTION +unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val ) +{ return __sync_fetch_and_or(dest,val); } + +#endif + +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template< typename T > +T atomic_fetch_or( volatile T * const dest , const T val ) +{ + T retval; +#pragma omp atomic capture + { + retval = dest[0]; + dest[0] |= val; + } + return retval; +} + +#endif + +//---------------------------------------------------------------------------- + +// Simpler version of atomic_fetch_or without the fetch +template <typename T> +KOKKOS_INLINE_FUNCTION +void atomic_or(volatile T * const dest, const T src) { + (void)atomic_fetch_or(dest,src); +} + +} + +#endif + + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a3a57aa81c7f303cf74fe5d8d7c6a50dc36eeb2d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -0,0 +1,235 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP ) +#define KOKKOS_ATOMIC_FETCH_SUB_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + +// Support for int, unsigned int, unsigned long long int, and float + +__inline__ __device__ +int atomic_fetch_sub( volatile int * const dest , const int val ) +{ return atomicSub((int*)dest,val); } + +__inline__ __device__ +unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) +{ return atomicSub((unsigned int*)dest,val); } + +template < typename T > +__inline__ __device__ +T atomic_fetch_sub( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) +{ + union { int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t - val ; + oldval.i = atomicCAS( (int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < typename T > +__inline__ __device__ +T atomic_fetch_sub( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) +{ + union { unsigned long long int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t - val ; + oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + + +//---------------------------------------------------------------------------- + +template < typename T > +__inline__ __device__ +T atomic_fetch_sub( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + , const T >::type& val ) +{ + T return_val; + // This is a way to (hopefully) avoid dead lock in a warp + int done = 0; + while ( done>0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = return_val - val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 0; + } + } + return return_val; +} + +//---------------------------------------------------------------------------- + +#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) + +KOKKOS_INLINE_FUNCTION +int atomic_fetch_sub( volatile int * const dest , const int val ) +{ return __sync_fetch_and_sub(dest,val); } + +KOKKOS_INLINE_FUNCTION +long int atomic_fetch_sub( volatile long int * const dest , const long int val ) +{ return __sync_fetch_and_sub(dest,val); } + +#if defined( KOKKOS_ATOMICS_USE_GCC ) + +KOKKOS_INLINE_FUNCTION +unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) +{ return __sync_fetch_and_sub(dest,val); } + +KOKKOS_INLINE_FUNCTION +unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val ) +{ return __sync_fetch_and_sub(dest,val); } + +#endif + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_sub( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) +{ + union { int i ; T t ; } assume , oldval , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t - val ; + oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_sub( volatile T * const dest , + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(long) , const T >::type val ) +{ + union { long i ; T t ; } assume , oldval , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = assume.t - val ; + oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + + +//---------------------------------------------------------------------------- + +template < typename T > +inline +T atomic_fetch_sub( volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + , const T >::type& val ) +{ + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + *dest = return_val - val; + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} + +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + +template< typename T > +T atomic_fetch_sub( volatile T * const dest , const T val ) +{ + T retval; +#pragma omp atomic capture + { + retval = dest[0]; + dest[0] -= val; + } + return retval; +} + +#endif + +// Simpler version of atomic_fetch_sub without the fetch +template <typename T> +KOKKOS_INLINE_FUNCTION +void atomic_sub(volatile T * const dest, const T src) { + atomic_fetch_sub(dest,src); +} + +} + +#include<impl/Kokkos_Atomic_Assembly.hpp> +#endif + + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..343e9bf4c48fa499199930ebbf9a1fb893e475da --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -0,0 +1,419 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP ) +#define KOKKOS_ATOMIC_GENERIC_HPP +#include <Kokkos_Macros.hpp> + +// Combination operands to be used in an Compare and Exchange based atomic operation +namespace Kokkos { +namespace Impl { + +template<class Scalar1, class Scalar2> +struct MaxOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return (val1 > val2 ? val1 : val2); + } +}; + +template<class Scalar1, class Scalar2> +struct MinOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return (val1 < val2 ? val1 : val2); + } +}; + +template<class Scalar1, class Scalar2> +struct AddOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1+val2; + } +}; + +template<class Scalar1, class Scalar2> +struct SubOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1-val2; + } +}; + +template<class Scalar1, class Scalar2> +struct MulOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1*val2; + } +}; + +template<class Scalar1, class Scalar2> +struct DivOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1/val2; + } +}; + +template<class Scalar1, class Scalar2> +struct ModOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1%val2; + } +}; + +template<class Scalar1, class Scalar2> +struct AndOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1&val2; + } +}; + +template<class Scalar1, class Scalar2> +struct OrOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1|val2; + } +}; + +template<class Scalar1, class Scalar2> +struct XorOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1^val2; + } +}; + +template<class Scalar1, class Scalar2> +struct LShiftOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1<<val2; + } +}; + +template<class Scalar1, class Scalar2> +struct RShiftOper { + KOKKOS_FORCEINLINE_FUNCTION + static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { + return val1>>val2; + } +}; + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_oper( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) +{ + union { unsigned long long int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = Oper::apply(assume.t, val) ; + oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_oper_fetch( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) +{ + union { unsigned long long int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = Oper::apply(assume.t, val) ; + oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return newval.t ; +} + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_oper( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) +{ + union { int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = Oper::apply(assume.t, val) ; + oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return oldval.t ; +} + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_oper_fetch( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val ) +{ + union { int i ; T t ; } oldval , assume , newval ; + + oldval.t = *dest ; + + do { + assume.i = oldval.i ; + newval.t = Oper::apply(assume.t, val) ; + oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); + } while ( assume.i != oldval.i ); + + return newval.t ; +} + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_oper( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) + && ( sizeof(T) != 16 ) + #endif + , const T >::type val ) +{ + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + *dest = Oper::apply(return_val, val); + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +#else + // This is a way to (hopefully) avoid dead lock in a warp + int done = 1; + while ( done>0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + T return_val = *dest; + *dest = Oper::apply(return_val, val);; + Impl::unlock_address_cuda_space( (void*) dest ); + done=0; + } + } + return return_val; +#endif +} + +template < class Oper, typename T > +KOKKOS_INLINE_FUNCTION +T atomic_oper_fetch( const Oper& op, volatile T * const dest , + typename ::Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = Oper::apply(*dest, val); + *dest = return_val; + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +#else + // This is a way to (hopefully) avoid dead lock in a warp + int done = 1; + while ( done>0 ) { + done++; + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + T return_val = Oper::apply(*dest, val); + *dest = return_val; + Impl::unlock_address_cuda_space( (void*) dest ); + done=0; + } + } + return return_val; +#endif +} + +} +} + +namespace Kokkos { + +// Fetch_Oper atomics: return value before operation +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_max(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::MaxOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_min(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::MinOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_mul(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::MulOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_div(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::DivOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_mod(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::ModOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_and(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::AndOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_or(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::OrOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_xor(volatile T * const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::XorOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_lshift(volatile T * const dest, const unsigned int val) { + return Impl::atomic_fetch_oper(Impl::LShiftOper<T,const unsigned int>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_fetch_rshift(volatile T * const dest, const unsigned int val) { + return Impl::atomic_fetch_oper(Impl::RShiftOper<T,const unsigned int>(),dest,val); +} + + +// Oper Fetch atomics: return value after operation +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_max_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::MaxOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_min_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::MinOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_mul_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::MulOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_div_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::DivOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_mod_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::ModOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_and_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::AndOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_or_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::OrOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_xor_fetch(volatile T * const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::XorOper<T,const T>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_lshift_fetch(volatile T * const dest, const unsigned int val) { + return Impl::atomic_oper_fetch(Impl::LShiftOper<T,const unsigned int>(),dest,val); +} + +template < typename T > +KOKKOS_INLINE_FUNCTION +T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) { + return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val); +} + + +} +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eecda29f1c20524c4ff95acec646417c9160c1a6 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp @@ -0,0 +1,117 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT ) +#define KOKKOS_ATOMIC_INCREMENT + +namespace Kokkos { + +// Atomic increment +template<> +KOKKOS_INLINE_FUNCTION +void atomic_increment<char>(volatile char* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock incb %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_increment<short>(volatile short* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock incw %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_increment<int>(volatile int* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock incl %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,1); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void atomic_increment<long long int>(volatile long long int* a) { +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) + __asm__ __volatile__( + "lock incq %0" + : /* no output registers */ + : "m" (a[0]) + : "memory" + ); +#else + Kokkos::atomic_fetch_add(a,1); +#endif +} + +template<typename T> +KOKKOS_INLINE_FUNCTION +void atomic_increment(volatile T* a) { + Kokkos::atomic_fetch_add(a,1); +} + +} // End of namespace Kokkos +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6e48faa6948e808c3460b4408ebb85a75617d035 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp @@ -0,0 +1,430 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_ATOMIC_VIEW_HPP +#define KOKKOS_ATOMIC_VIEW_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Atomic.hpp> + +namespace Kokkos { namespace Impl { + +//The following tag is used to prevent an implicit call of the constructor when trying +//to assign a literal 0 int ( = 0 ); +struct AtomicViewConstTag {}; + +template<class ViewTraits> +class AtomicDataElement { +public: + typedef typename ViewTraits::value_type value_type; + typedef typename ViewTraits::const_value_type const_value_type; + typedef typename ViewTraits::non_const_value_type non_const_value_type; + volatile value_type* const ptr; + + KOKKOS_INLINE_FUNCTION + AtomicDataElement(value_type* ptr_, AtomicViewConstTag ):ptr(ptr_){} + + KOKKOS_INLINE_FUNCTION + const_value_type operator = (const_value_type& val) const { + *ptr = val; + return val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator = (volatile const_value_type& val) const { + *ptr = val; + return val; + } + + KOKKOS_INLINE_FUNCTION + void inc() const { + Kokkos::atomic_increment(ptr); + } + + KOKKOS_INLINE_FUNCTION + void dec() const { + Kokkos::atomic_decrement(ptr); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ++ () const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1); + return tmp+1; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator -- () const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1); + return tmp-1; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ++ (int) const { + return Kokkos::atomic_fetch_add(ptr,1); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator -- (int) const { + return Kokkos::atomic_fetch_add(ptr,-1); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator += (const_value_type& val) const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val); + return tmp+val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator += (volatile const_value_type& val) const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val); + return tmp+val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator -= (const_value_type& val) const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val); + return tmp-val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator -= (volatile const_value_type& val) const { + const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val); + return tmp-val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator *= (const_value_type& val) const { + return Kokkos::atomic_mul_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator *= (volatile const_value_type& val) const { + return Kokkos::atomic_mul_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator /= (const_value_type& val) const { + return Kokkos::atomic_div_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator /= (volatile const_value_type& val) const { + return Kokkos::atomic_div_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator %= (const_value_type& val) const { + return Kokkos::atomic_mod_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator %= (volatile const_value_type& val) const { + return Kokkos::atomic_mod_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator &= (const_value_type& val) const { + return Kokkos::atomic_and_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator &= (volatile const_value_type& val) const { + return Kokkos::atomic_and_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ^= (const_value_type& val) const { + return Kokkos::atomic_xor_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator ^= (volatile const_value_type& val) const { + return Kokkos::atomic_xor_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator |= (const_value_type& val) const { + return Kokkos::atomic_or_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator |= (volatile const_value_type& val) const { + return Kokkos::atomic_or_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator <<= (const_value_type& val) const { + return Kokkos::atomic_lshift_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator <<= (volatile const_value_type& val) const { + return Kokkos::atomic_lshift_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator >>= (const_value_type& val) const { + return Kokkos::atomic_rshift_fetch(ptr,val); + } + KOKKOS_INLINE_FUNCTION + const_value_type operator >>= (volatile const_value_type& val) const { + return Kokkos::atomic_rshift_fetch(ptr,val); + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator + (const_value_type& val) const { + return *ptr+val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator + (volatile const_value_type& val) const { + return *ptr+val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator - (const_value_type& val) const { + return *ptr-val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator - (volatile const_value_type& val) const { + return *ptr-val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator * (const_value_type& val) const { + return *ptr*val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator * (volatile const_value_type& val) const { + return *ptr*val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator / (const_value_type& val) const { + return *ptr/val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator / (volatile const_value_type& val) const { + return *ptr/val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator % (const_value_type& val) const { + return *ptr^val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator % (volatile const_value_type& val) const { + return *ptr^val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ! () const { + return !*ptr; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator && (const_value_type& val) const { + return *ptr&&val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator && (volatile const_value_type& val) const { + return *ptr&&val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator || (const_value_type& val) const { + return *ptr|val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator || (volatile const_value_type& val) const { + return *ptr|val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator & (const_value_type& val) const { + return *ptr&val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator & (volatile const_value_type& val) const { + return *ptr&val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator | (const_value_type& val) const { + return *ptr|val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator | (volatile const_value_type& val) const { + return *ptr|val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ^ (const_value_type& val) const { + return *ptr^val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator ^ (volatile const_value_type& val) const { + return *ptr^val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator ~ () const { + return ~*ptr; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator << (const unsigned int& val) const { + return *ptr<<val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator << (volatile const unsigned int& val) const { + return *ptr<<val; + } + + KOKKOS_INLINE_FUNCTION + const_value_type operator >> (const unsigned int& val) const { + return *ptr>>val; + } + KOKKOS_INLINE_FUNCTION + const_value_type operator >> (volatile const unsigned int& val) const { + return *ptr>>val; + } + + KOKKOS_INLINE_FUNCTION + bool operator == (const_value_type& val) const { + return *ptr == val; + } + KOKKOS_INLINE_FUNCTION + bool operator == (volatile const_value_type& val) const { + return *ptr == val; + } + + KOKKOS_INLINE_FUNCTION + bool operator != (const_value_type& val) const { + return *ptr != val; + } + KOKKOS_INLINE_FUNCTION + bool operator != (volatile const_value_type& val) const { + return *ptr != val; + } + + KOKKOS_INLINE_FUNCTION + bool operator >= (const_value_type& val) const { + return *ptr >= val; + } + KOKKOS_INLINE_FUNCTION + bool operator >= (volatile const_value_type& val) const { + return *ptr >= val; + } + + KOKKOS_INLINE_FUNCTION + bool operator <= (const_value_type& val) const { + return *ptr <= val; + } + KOKKOS_INLINE_FUNCTION + bool operator <= (volatile const_value_type& val) const { + return *ptr <= val; + } + + KOKKOS_INLINE_FUNCTION + bool operator < (const_value_type& val) const { + return *ptr < val; + } + KOKKOS_INLINE_FUNCTION + bool operator < (volatile const_value_type& val) const { + return *ptr < val; + } + + KOKKOS_INLINE_FUNCTION + bool operator > (const_value_type& val) const { + return *ptr > val; + } + KOKKOS_INLINE_FUNCTION + bool operator > (volatile const_value_type& val) const { + return *ptr > val; + } + + KOKKOS_INLINE_FUNCTION + operator const_value_type () const { + //return Kokkos::atomic_load(ptr); + return *ptr; + } + + KOKKOS_INLINE_FUNCTION + operator volatile non_const_value_type () volatile const { + //return Kokkos::atomic_load(ptr); + return *ptr; + } +}; + +template<class ViewTraits> +class AtomicViewDataHandle { +public: + typename ViewTraits::value_type* ptr; + + KOKKOS_INLINE_FUNCTION + AtomicViewDataHandle() + : ptr(NULL) + {} + + KOKKOS_INLINE_FUNCTION + AtomicViewDataHandle(typename ViewTraits::value_type* ptr_) + :ptr(ptr_) + {} + + template<class iType> + KOKKOS_INLINE_FUNCTION + AtomicDataElement<ViewTraits> operator[] (const iType& i) const { + return AtomicDataElement<ViewTraits>(ptr+i,AtomicViewConstTag()); + } + + + KOKKOS_INLINE_FUNCTION + operator typename ViewTraits::value_type * () const { return ptr ; } + +}; + +template<unsigned Size> +struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars; + +template<> +struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> { + typedef int type; +}; + +template<> +struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> { + typedef int64_t type; +}; + +}} // namespace Kokkos::Impl + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp new file mode 100644 index 0000000000000000000000000000000000000000..14066e8be25f42e7a33b2f9261d90769dff6060d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -0,0 +1,232 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_ATOMIC_WINDOWS_HPP +#define KOKKOS_ATOMIC_WINDOWS_HPP +#ifdef _WIN32 + +#define NOMINMAX +#include <winsock2.h> +#include <Windows.h> + +namespace Kokkos { + namespace Impl { + _declspec(align(16)) + struct cas128_t + { + LONGLONG lower; + LONGLONG upper; + KOKKOS_INLINE_FUNCTION + bool operator != (const cas128_t& a) const { + return (lower != a.lower) || upper != a.upper; + } + }; + } + + template < typename T > + KOKKOS_INLINE_FUNCTION + T atomic_compare_exchange(volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONG), const T & >::type val) + { + union U { + LONG i; + T t; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp; + + tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare)); + return tmp.t; + } + + template < typename T > + KOKKOS_INLINE_FUNCTION + T atomic_compare_exchange(volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONGLONG), const T & >::type val) + { + union U { + LONGLONG i; + T t; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp; + + tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), *((LONGLONG*)&compare)); + return tmp.t; + } + + template < typename T > + KOKKOS_INLINE_FUNCTION + T atomic_compare_exchange(volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val) + { + union U { + Impl::cas128_t i; + T t; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp, newval; + newval.t = val; + _InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, ((LONGLONG*)&compare)); + tmp.t = dest; + return tmp.t; + } + + template < typename T > + KOKKOS_INLINE_FUNCTION + T atomic_compare_exchange_strong(volatile T * const dest, const T & compare, const T & val) + { + return atomic_compare_exchange(dest,compare,val); + } + + template< typename T > + T atomic_fetch_or(volatile T * const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = val | oldval; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + + return oldval; + } + + template< typename T > + T atomic_fetch_and(volatile T * const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = val & oldval; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + + return oldval; + } + + template< typename T > + T atomic_fetch_add(volatile T * const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = val + oldval; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + + return oldval; + } + + template< typename T > + T atomic_fetch_sub(volatile T * const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = val - oldval; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + + return oldval; + } + + template< typename T > + T atomic_exchange(volatile T * const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + oldval = atomic_compare_exchange(dest, assume, val); + } while (assume != oldval); + + return oldval; + } + + template< typename T > + void atomic_or(volatile T * const dest, const T val) { + atomic_fetch_or(dest, val); + } + + template< typename T > + void atomic_and(volatile T * const dest, const T val) { + atomic_fetch_and(dest, val); + } + + template< typename T > + void atomic_add(volatile T * const dest, const T val) { + atomic_fetch_add(dest, val); + } + + template< typename T > + void atomic_sub(volatile T * const dest, const T val) { + atomic_fetch_sub(dest, val); + } + + template< typename T > + void atomic_assign(volatile T * const dest, const T val) { + atomic_fetch_exchange(dest, val); + } + + template< typename T > + T atomic_increment(volatile T * const dest) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = assume++; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + } + + template< typename T > + T atomic_decrement(volatile T * const dest) { + T oldval = *dest; + T assume; + do { + assume = oldval; + T newval = assume--; + oldval = atomic_compare_exchange(dest, assume, newval); + } while (assume != oldval); + } + +} +#endif +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0ffbc0548ab663c9b6afa8799f162e3c7bbd7510 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BITOPS_HPP +#define KOKKOS_BITOPS_HPP + +#include <Kokkos_Macros.hpp> +#include <stdint.h> +#include <climits> + +namespace Kokkos { +namespace Impl { + +KOKKOS_FORCEINLINE_FUNCTION +int bit_scan_forward( unsigned i ) +{ +#if defined( __CUDA_ARCH__ ) + return __ffs(i) - 1; +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_ffs(i) - 1; +#elif defined( __INTEL_COMPILER ) + return _bit_scan_forward(i); +#else + + unsigned t = 1u; + int r = 0; + while ( i && ( i & t == 0 ) ) + { + t = t << 1; + ++r; + } + return r; +#endif +} + +KOKKOS_FORCEINLINE_FUNCTION +int bit_scan_reverse( unsigned i ) +{ + enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) }; +#if defined( __CUDA_ARCH__ ) + return shift - __clz(i); +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + return shift - __builtin_clz(i); +#elif defined( __INTEL_COMPILER ) + return _bit_scan_reverse(i); +#else + unsigned t = 1u << shift; + int r = 0; + while ( i && ( i & t == 0 ) ) + { + t = t >> 1; + ++r; + } + return r; +#endif +} + +/// Count the number of bits set. +KOKKOS_FORCEINLINE_FUNCTION +int bit_count( unsigned i ) +{ +#if defined( __CUDA_ARCH__ ) + return __popc(i); +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_popcount(i); +#elif defined ( __INTEL_COMPILER ) + return _popcnt32(i); +#else + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive + i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp + i = ( i & ~0u / 15u * 3u ) + ( ( i >> 2 ) & ~0u / 15u * 3u ); // temp + i = ( i + ( i >> 4 ) ) & ~0u / 255u * 15u; // temp + + // count + return (int)( ( i * ( ~0u / 255u ) ) >> ( sizeof(unsigned) - 1 ) * CHAR_BIT ); +#endif +} + +} // namespace Impl +} // namespace Kokkos + +#endif // KOKKOS_BITOPS_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b9d23bd815433a0a91c282dd6e787b7d16f8b0e3 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#else +#include <unistd.h> +#endif +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cerrno> + +namespace Kokkos { +namespace Impl { + +//The following function (processors_per_node) is copied from here: +// https://lists.gnu.org/archive/html/autoconf/2002-08/msg00126.html +// Philip Willoughby + +int processors_per_node() { + int nprocs = -1; + int nprocs_max = -1; +#ifdef _WIN32 +#ifndef _SC_NPROCESSORS_ONLN +SYSTEM_INFO info; +GetSystemInfo(&info); +#define sysconf(a) info.dwNumberOfProcessors +#define _SC_NPROCESSORS_ONLN +#endif +#endif +#ifdef _SC_NPROCESSORS_ONLN + nprocs = sysconf(_SC_NPROCESSORS_ONLN); + if (nprocs < 1) + { + return -1; + } + nprocs_max = sysconf(_SC_NPROCESSORS_CONF); + if (nprocs_max < 1) + { + return -1; + } + return nprocs; +#else + return -1; +#endif +} + +int mpi_ranks_per_node() { + char *str; + int ppn = 1; + if ((str = getenv("SLURM_TASKS_PER_NODE"))) { + ppn = atoi(str); + if(ppn<=0) ppn = 1; + } + if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) { + ppn = atoi(str); + if(ppn<=0) ppn = 1; + } + if ((str = getenv("OMPI_COMM_WORLD_LOCAL_SIZE"))) { + ppn = atoi(str); + if(ppn<=0) ppn = 1; + } + return ppn; +} + +int mpi_local_rank_on_node() { + char *str; + int local_rank=0; + if ((str = getenv("SLURM_LOCALID"))) { + local_rank = atoi(str); + } + if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { + local_rank = atoi(str); + } + if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { + local_rank = atoi(str); + } + return local_rank; +} + +} +} diff --git a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp new file mode 100644 index 0000000000000000000000000000000000000000..af474bc40617968fbc87fb2cf6b70e1bec0d42f5 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp @@ -0,0 +1,51 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +namespace Kokkos { +namespace Impl { + +int processors_per_node(); +int mpi_ranks_per_node(); +int mpi_local_rank_on_node(); + +} +} diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp new file mode 100644 index 0000000000000000000000000000000000000000..567a2141405719e3331b2327ca40097c24af775a --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -0,0 +1,454 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <cctype> +#include <cstring> +#include <iostream> +#include <cstdlib> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +namespace { + +bool is_unsigned_int(const char* str) +{ + const size_t len = strlen (str); + for (size_t i = 0; i < len; ++i) { + if (! isdigit (str[i])) { + return false; + } + } + return true; +} + +void initialize_internal(const InitArguments& args) +{ +// This is an experimental setting +// For KNL in Flat mode this variable should be set, so that +// memkind allocates high bandwidth memory correctly. +#ifdef KOKKOS_HAVE_HBWSPACE +setenv("MEMKIND_HBW_NODES", "1", 0); +#endif + + // Protect declarations, to prevent "unused variable" warnings. +#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD ) + const int num_threads = args.num_threads; + const int use_numa = args.num_numa; +#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD ) +#if defined( KOKKOS_HAVE_CUDA ) + const int use_gpu = args.device_id; +#endif // defined( KOKKOS_HAVE_CUDA ) + +#if defined( KOKKOS_HAVE_OPENMP ) + if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { + if(num_threads>0) { + if(use_numa>0) { + Kokkos::OpenMP::initialize(num_threads,use_numa); + } + else { + Kokkos::OpenMP::initialize(num_threads); + } + } else { + Kokkos::OpenMP::initialize(); + } + //std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ; + } + else { + //std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ; + } +#endif + +#if defined( KOKKOS_HAVE_PTHREAD ) + if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { + if(num_threads>0) { + if(use_numa>0) { + Kokkos::Threads::initialize(num_threads,use_numa); + } + else { + Kokkos::Threads::initialize(num_threads); + } + } else { + Kokkos::Threads::initialize(); + } + //std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ; + } + else { + //std::cout << "Kokkos::initialize() fyi: Pthread enabled but not initialized" << std::endl ; + } +#endif + +#if defined( KOKKOS_HAVE_SERIAL ) + // Prevent "unused variable" warning for 'args' input struct. If + // Serial::initialize() ever needs to take arguments from the input + // struct, you may remove this line of code. + (void) args; + + if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { + Kokkos::Serial::initialize(); + } +#endif + +#if defined( KOKKOS_HAVE_CUDA ) + if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) { + if (use_gpu > -1) { + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) ); + } + else { + Kokkos::Cuda::initialize(); + } + //std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ; + } +#endif + +#if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); +#endif +} + +void finalize_internal( const bool all_spaces = false ) +{ + +#if defined( KOKKOS_HAVE_CUDA ) + if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) { + if(Kokkos::Cuda::is_initialized()) + Kokkos::Cuda::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value || + all_spaces ) { + if(Kokkos::OpenMP::is_initialized()) + Kokkos::OpenMP::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_PTHREAD ) + if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || + all_spaces ) { + if(Kokkos::Threads::is_initialized()) + Kokkos::Threads::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_SERIAL ) + if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value || + all_spaces ) { + if(Kokkos::Serial::is_initialized()) + Kokkos::Serial::finalize(); + } +#endif + +#if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); +#endif + +} + +void fence_internal() +{ + +#if defined( KOKKOS_HAVE_CUDA ) + if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) { + Kokkos::Cuda::fence(); + } +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { + Kokkos::OpenMP::fence(); + } +#endif + +#if defined( KOKKOS_HAVE_PTHREAD ) + if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { + Kokkos::Threads::fence(); + } +#endif + +#if defined( KOKKOS_HAVE_SERIAL ) + if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { + Kokkos::Serial::fence(); + } +#endif + +} + +} // namespace +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +void initialize(int& narg, char* arg[]) +{ + int num_threads = -1; + int numa = -1; + int device = -1; + + int kokkos_threads_found = 0; + int kokkos_numa_found = 0; + int kokkos_device_found = 0; + int kokkos_ndevices_found = 0; + + int iarg = 0; + + while (iarg < narg) { + if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) { + //Find the number of threads (expecting --threads=XX) + if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0))) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + char* number = strchr(arg[iarg],'=')+1; + + if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found) + num_threads = atoi(number); + + //Remove the --kokkos-threads argument from the list but leave --threads + if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; + } + kokkos_threads_found=1; + narg--; + } else { + iarg++; + } + } else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) { + //Find the number of numa (expecting --numa=XX) + if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0))) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + char* number = strchr(arg[iarg],'=')+1; + + if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found) + numa = atoi(number); + + //Remove the --kokkos-numa argument from the list but leave --numa + if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; + } + kokkos_numa_found=1; + narg--; + } else { + iarg++; + } + } else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) { + //Find the number of device (expecting --device=XX) + if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0))) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + char* number = strchr(arg[iarg],'=')+1; + + if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) + Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found) + device = atoi(number); + + //Remove the --kokkos-device argument from the list but leave --device + if(strncmp(arg[iarg],"--kokkos-device",15) == 0) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; + } + kokkos_device_found=1; + narg--; + } else { + iarg++; + } + } else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) { + + //Find the number of device (expecting --device=XX) + if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0))) + Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + int ndevices=-1; + int skip_device = 9999; + + char* num1 = strchr(arg[iarg],'=')+1; + char* num2 = strpbrk(num1,","); + int num1_len = num2==NULL?strlen(num1):num2-num1; + char* num1_only = new char[num1_len+1]; + strncpy(num1_only,num1,num1_len); + num1_only[num1_len]=0; + + if(!Impl::is_unsigned_int(num1_only) || (strlen(num1_only)==0)) { + Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[])."); + } + if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) + ndevices = atoi(num1_only); + + if( num2 != NULL ) { + if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) ) + Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, char* argc[])."); + + if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) + skip_device = atoi(num2+1); + } + + if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) { + char *str; + if ((str = getenv("SLURM_LOCALID"))) { + int local_rank = atoi(str); + device = local_rank % ndevices; + if (device >= skip_device) device++; + } + if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { + int local_rank = atoi(str); + device = local_rank % ndevices; + if (device >= skip_device) device++; + } + if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { + int local_rank = atoi(str); + device = local_rank % ndevices; + if (device >= skip_device) device++; + } + if(device==-1) { + device = 0; + if (device >= skip_device) device++; + } + } + + //Remove the --kokkos-ndevices argument from the list but leave --ndevices + if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; + } + kokkos_ndevices_found=1; + narg--; + } else { + iarg++; + } + } else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) { + std::cout << std::endl; + std::cout << "--------------------------------------------------------------------------------" << std::endl; + std::cout << "-------------Kokkos command line arguments--------------------------------------" << std::endl; + std::cout << "--------------------------------------------------------------------------------" << std::endl; + std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl; + std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl; + std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl; + std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl; + std::cout << "settings." << std::endl; + std::cout << std::endl; + std::cout << "--kokkos-help : print this message" << std::endl; + std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl; + std::cout << " number of threads per NUMA region if " << std::endl; + std::cout << " used in conjunction with '--numa' option. " << std::endl; + std::cout << "--kokkos-numa=INT : specify number of NUMA regions used by process." << std::endl; + std::cout << "--kokkos-device=INT : specify device id to be used by Kokkos. " << std::endl; + std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. Specify number of" << std::endl; + std::cout << " devices per node to be used. Process to device" << std::endl; + std::cout << " mapping happens by obtaining the local MPI rank" << std::endl; + std::cout << " and assigning devices round-robin. The optional" << std::endl; + std::cout << " second argument allows for an existing device" << std::endl; + std::cout << " to be ignored. This is most useful on workstations" << std::endl; + std::cout << " with multiple GPUs of which one is used to drive" << std::endl; + std::cout << " screen output." << std::endl; + std::cout << std::endl; + std::cout << "--------------------------------------------------------------------------------" << std::endl; + std::cout << std::endl; + + //Remove the --kokkos-help argument from the list but leave --ndevices + if(strcmp(arg[iarg],"--kokkos-help") == 0) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; + } + narg--; + } else { + iarg++; + } + } else + iarg++; + } + + InitArguments arguments; + arguments.num_threads = num_threads; + arguments.num_numa = numa; + arguments.device_id = device; + Impl::initialize_internal(arguments); +} + +void initialize(const InitArguments& arguments) { + Impl::initialize_internal(arguments); +} + +void finalize() +{ + Impl::finalize_internal(); +} + +void finalize_all() +{ + enum { all_spaces = true }; + Impl::finalize_internal( all_spaces ); +} + +void fence() +{ + Impl::fence_internal(); +} + +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.cpp b/lib/kokkos/core/src/impl/Kokkos_Error.cpp new file mode 100644 index 0000000000000000000000000000000000000000..36224990d048c2e0394889390cfe78cf826a5fdc --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Error.cpp @@ -0,0 +1,193 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <ostream> +#include <sstream> +#include <iomanip> +#include <stdexcept> +#include <impl/Kokkos_Error.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void host_abort( const char * const message ) +{ + fwrite(message,1,strlen(message),stderr); + fflush(stderr); + ::abort(); +} + +void throw_runtime_exception( const std::string & msg ) +{ + std::ostringstream o ; + o << msg ; + traceback_callstack( o ); + throw std::runtime_error( o.str() ); +} + + +std::string human_memory_size(size_t arg_bytes) +{ + double bytes = arg_bytes; + const double K = 1024; + const double M = K*1024; + const double G = M*1024; + + std::ostringstream out; + if (bytes < K) { + out << std::setprecision(4) << bytes << " B"; + } else if (bytes < M) { + bytes /= K; + out << std::setprecision(4) << bytes << " K"; + } else if (bytes < G) { + bytes /= M; + out << std::setprecision(4) << bytes << " M"; + } else { + bytes /= G; + out << std::setprecision(4) << bytes << " G"; + } + return out.str(); +} + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK ) + +/* This is only known to work with GNU C++ + * Must be compiled with '-rdynamic' + * Must be linked with '-ldl' + */ + +/* Print call stack into an error stream, + * so one knows in which function the error occured. + * + * Code copied from: + * http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html + * + * License on this site: + * This blog is licensed under a + * Creative Commons Attribution-Share Alike 3.0 Unported License. + * + * http://creativecommons.org/licenses/by-sa/3.0/ + * + * Modified to output to std::ostream. + */ +#include <signal.h> +#include <execinfo.h> +#include <cxxabi.h> +#include <dlfcn.h> +#include <stdlib.h> + +namespace Kokkos { +namespace Impl { + +void traceback_callstack( std::ostream & msg ) +{ + using namespace abi; + + enum { MAX_DEPTH = 32 }; + + void *trace[MAX_DEPTH]; + Dl_info dlinfo; + + int status; + + int trace_size = backtrace(trace, MAX_DEPTH); + + msg << std::endl << "Call stack {" << std::endl ; + + for (int i=1; i<trace_size; ++i) + { + if(!dladdr(trace[i], &dlinfo)) + continue; + + const char * symname = dlinfo.dli_sname; + + char * demangled = __cxa_demangle(symname, NULL, 0, &status); + + if ( status == 0 && demangled ) { + symname = demangled; + } + + if ( symname && *symname != 0 ) { + msg << " object: " << dlinfo.dli_fname + << " function: " << symname + << std::endl ; + } + + if ( demangled ) { + free(demangled); + } + } + msg << "}" ; +} + +} +} + +#else + +namespace Kokkos { +namespace Impl { + +void traceback_callstack( std::ostream & msg ) +{ + msg << std::endl << "Traceback functionality not available" << std::endl ; +} + +} +} + +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5f88d662069bcb6313c803073385736e23a93456 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -0,0 +1,82 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_ERROR_HPP +#define KOKKOS_IMPL_ERROR_HPP + +#include <string> +#include <iosfwd> +#include <KokkosCore_config.h> +#ifdef KOKKOS_HAVE_CUDA +#include <Cuda/Kokkos_Cuda_abort.hpp> +#endif + +namespace Kokkos { +namespace Impl { + +void host_abort( const char * const ); + +void throw_runtime_exception( const std::string & ); + +void traceback_callstack( std::ostream & ); + +std::string human_memory_size(size_t arg_bytes); + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +namespace Kokkos { +inline +void abort( const char * const message ) { Kokkos::Impl::host_abort(message); } +} +#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cabf5a3caccb0bd0beca292f5dcc895867bb1a2e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp @@ -0,0 +1,19 @@ +#include <Kokkos_Core.hpp> +namespace Kokkos { +namespace Impl { + PerTeamValue::PerTeamValue(int arg):value(arg) {} + + PerThreadValue::PerThreadValue(int arg):value(arg) {} +} + +Impl::PerTeamValue PerTeam(const int& arg) +{ + return Impl::PerTeamValue(arg); +} + +Impl::PerThreadValue PerThread(const int& arg) +{ + return Impl::PerThreadValue(arg); +} + +} diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..78b6794491a77b78c1025b10fbe3d214fdc71fdb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -0,0 +1,1131 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FUNCTORADAPTER_HPP +#define KOKKOS_FUNCTORADAPTER_HPP + +#include <cstddef> +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Tags.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ArgTag , class Enable = void > +struct FunctorDeclaresValueType : public Impl::false_type {}; + +template< class FunctorType , class ArgTag > +struct FunctorDeclaresValueType< FunctorType , ArgTag + , typename Impl::enable_if_type< typename FunctorType::value_type >::type > + : public Impl::true_type {}; + + +/** \brief Query Functor and execution policy argument tag for value type. + * + * If C++11 enabled and 'value_type' is not explicitly declared then attempt + * to deduce the type from FunctorType::operator(). + */ +template< class FunctorType , class ArgTag , bool Dec = FunctorDeclaresValueType<FunctorType,ArgTag>::value > +struct FunctorValueTraits +{ + typedef void value_type ; + typedef void pointer_type ; + typedef void reference_type ; + typedef void functor_type ; + + enum { StaticValueSize = 0 }; + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const FunctorType & ) { return 0 ; } + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const FunctorType & ) { return 0 ; } +}; + +template<class ArgTag> +struct FunctorValueTraits<void, ArgTag,false> +{ + typedef void value_type ; + typedef void pointer_type ; + typedef void reference_type ; + typedef void functor_type ; +}; + +/** \brief FunctorType::value_type is explicitly declared so use it. + * + * Two options for declaration + * + * 1) A plain-old-data (POD) type + * typedef {pod_type} value_type ; + * + * 2) An array of POD of a runtime specified count. + * typedef {pod_type} value_type[] ; + * const unsigned value_count ; + */ +template< class FunctorType , class ArgTag > +struct FunctorValueTraits< FunctorType , ArgTag , true /* == exists FunctorType::value_type */ > +{ + typedef typename Impl::remove_extent< typename FunctorType::value_type >::type value_type ; + typedef FunctorType functor_type; + + static_assert( 0 == ( sizeof(value_type) % sizeof(int) ) , + "Reduction functor's declared value_type requires: 0 == sizeof(value_type) % sizeof(int)" ); + + // If not an array then what is the sizeof(value_type) + enum { StaticValueSize = Impl::is_array< typename FunctorType::value_type >::value ? 0 : sizeof(value_type) }; + + typedef value_type * pointer_type ; + + // The reference_type for an array is 'value_type *' + // The reference_type for a single value is 'value_type &' + + typedef typename Impl::if_c< ! StaticValueSize , value_type * + , value_type & >::type reference_type ; + + // Number of values if single value + template< class F > + KOKKOS_FORCEINLINE_FUNCTION static + typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type + value_count( const F & ) { return 1 ; } + + // Number of values if an array, protect via templating because 'f.value_count' + // will only exist when the functor declares the value_type to be an array. + template< class F > + KOKKOS_FORCEINLINE_FUNCTION static + typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type + value_count( const F & f ) { return f.value_count ; } + + // Total size of the value + KOKKOS_INLINE_FUNCTION static + unsigned value_size( const FunctorType & f ) { return value_count( f ) * sizeof(value_type) ; } +}; + + +template< class FunctorType , class ArgTag > +struct FunctorValueTraits< FunctorType + , ArgTag + , false /* == exists FunctorType::value_type */ + > +{ +private: + + struct VOIDTAG {}; // Allow declaration of non-matching operator() with void argument tag. + struct REJECTTAG {}; // Reject tagged operator() when using non-tagged execution policy. + + typedef typename + Impl::if_c< Impl::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ; + + //---------------------------------------- + // parallel_for operator without a tag: + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( ArgMember ) const ) {} + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const ArgMember & ) const ) {} + + template< class TagType , class ArgMember > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , ArgMember ) const ) {} + + template< class TagType , class ArgMember > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , const ArgMember & ) const ) {} + + template< class TagType , class ArgMember > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , ArgMember ) const ) {} + + template< class TagType , class ArgMember > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , const ArgMember & ) const ) {} + + //---------------------------------------- + // parallel_for operator with a tag: + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , ArgMember ) const ) {} + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , ArgMember ) const ) {} + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , const ArgMember & ) const ) {} + + template< class ArgMember > + KOKKOS_INLINE_FUNCTION + static VOIDTAG deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , const ArgMember & ) const ) {} + + //---------------------------------------- + // parallel_reduce operator without a tag: + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( ArgMember , T & ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const ArgMember & , T & ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , ArgMember , T & ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , const ArgMember & , T & ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , ArgMember , T & ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , const ArgMember & , T & ) const ) {} + + //---------------------------------------- + // parallel_reduce operator with a tag: + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , ArgMember , T & ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , ArgMember , T & ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , const ArgMember & , T & ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , const ArgMember & , T & ) const ) {} + + //---------------------------------------- + // parallel_scan operator without a tag: + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( ArgMember , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const ArgMember & , T & , bool ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , ArgMember , T & , bool ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , const ArgMember & , T & , bool ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , ArgMember , T & , bool ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , const ArgMember & , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( ArgMember , T & , const bool& ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const ArgMember & , T & , const bool& ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , ArgMember , T & , const bool& ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( TagType , const ArgMember & , T & , const bool& ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , ArgMember , T & , const bool& ) const ) {} + + template< class TagType , class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static REJECTTAG deduce_reduce_type( VOIDTAG , void (FunctorType::*)( const TagType & , const ArgMember & , T & , const bool& ) const ) {} + //---------------------------------------- + // parallel_scan operator with a tag: + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , ArgMember , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , ArgMember , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , const ArgMember& , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , const ArgMember& , T & , bool ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , ArgMember , T & , const bool& ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , ArgMember , T & , const bool& ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( tag_type , const ArgMember& , T & , const bool& ) const ) {} + + template< class ArgMember , class T > + KOKKOS_INLINE_FUNCTION + static T deduce_reduce_type( tag_type , void (FunctorType::*)( const tag_type & , const ArgMember& , T & , const bool& ) const ) {} + //---------------------------------------- + + typedef decltype( deduce_reduce_type( tag_type() , & FunctorType::operator() ) ) ValueType ; + + enum { IS_VOID = Impl::is_same<VOIDTAG ,ValueType>::value }; + enum { IS_REJECT = Impl::is_same<REJECTTAG,ValueType>::value }; + +public: + + typedef typename Impl::if_c< IS_VOID || IS_REJECT , void , ValueType >::type value_type ; + typedef typename Impl::if_c< IS_VOID || IS_REJECT , void , ValueType * >::type pointer_type ; + typedef typename Impl::if_c< IS_VOID || IS_REJECT , void , ValueType & >::type reference_type ; + typedef FunctorType functor_type; + + static_assert( IS_VOID || IS_REJECT || 0 == ( sizeof(ValueType) % sizeof(int) ) , + "Reduction functor's value_type deduced from functor::operator() requires: 0 == sizeof(value_type) % sizeof(int)" ); + + enum { StaticValueSize = IS_VOID || IS_REJECT ? 0 : sizeof(ValueType) }; + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const FunctorType & ) { return StaticValueSize ; } + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const FunctorType & ) { return IS_VOID || IS_REJECT ? 0 : 1 ; } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** Function signatures for FunctorType::init function with a tag. + * reference_type is 'value_type &' for scalar and 'value_type *' for array. + */ +template< class FunctorType , class ArgTag > +struct FunctorValueInitFunction { + + typedef typename FunctorValueTraits<FunctorType,ArgTag>::reference_type + reference_type ; + + KOKKOS_INLINE_FUNCTION static void + enable_if( void (FunctorType::*)( ArgTag , reference_type ) const ); + KOKKOS_INLINE_FUNCTION static void + enable_if( void (FunctorType::*)( ArgTag const & , reference_type ) const ); + KOKKOS_INLINE_FUNCTION static void + enable_if( void ( *)( ArgTag , reference_type ) ); + KOKKOS_INLINE_FUNCTION static void + enable_if( void ( *)( ArgTag const & , reference_type ) ); + +}; + +/** Function signatures for FunctorType::init function without a tag. + * reference_type is 'value_type &' for scalar and 'value_type *' for array. + */ +template< class FunctorType > +struct FunctorValueInitFunction< FunctorType , void > { + + typedef typename FunctorValueTraits<FunctorType,void>::reference_type + reference_type ; + + KOKKOS_INLINE_FUNCTION static void + enable_if( void (FunctorType::*)( reference_type ) const ); + KOKKOS_INLINE_FUNCTION static void + enable_if( void ( *)( reference_type ) ); +}; + +// Adapter for value initialization function. +// If a proper FunctorType::init is declared then use it, +// otherwise use default constructor. +template< class FunctorType , class ArgTag + , class T = typename FunctorValueTraits<FunctorType,ArgTag>::reference_type + , class Enable = void > +struct FunctorValueInit ; + +/* No 'init' function provided for single value */ +template< class FunctorType , class ArgTag , class T , class Enable > +struct FunctorValueInit< FunctorType , ArgTag , T & , Enable > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T & init( const FunctorType & f , void * p ) + { return *( new(p) T() ); }; +}; + +/* No 'init' function provided for array value */ +template< class FunctorType , class ArgTag , class T , class Enable > +struct FunctorValueInit< FunctorType , ArgTag , T * , Enable > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T * init( const FunctorType & f , void * p ) + { + const int n = FunctorValueTraits< FunctorType , ArgTag >::value_count(f); + for ( int i = 0 ; i < n ; ++i ) { new( ((T*)p) + i ) T(); } + return (T*)p ; + } +}; + +/* 'init' function provided for single value */ +template< class FunctorType , class T > +struct FunctorValueInit + < FunctorType + , void + , T & + // First substitution failure when FunctorType::init does not exist. + // Second substitution failure when FunctorType::init is not compatible. + , decltype( FunctorValueInitFunction< FunctorType , void >::enable_if( & FunctorType::init ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T & init( const FunctorType & f , void * p ) + { f.init( *((T*)p) ); return *((T*)p) ; } +}; + +/* 'init' function provided for array value */ +template< class FunctorType , class T > +struct FunctorValueInit + < FunctorType + , void + , T * + // First substitution failure when FunctorType::init does not exist. + // Second substitution failure when FunctorType::init is not compatible + , decltype( FunctorValueInitFunction< FunctorType , void >::enable_if( & FunctorType::init ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T * init( const FunctorType & f , void * p ) + { f.init( (T*)p ); return (T*)p ; } +}; + +/* 'init' function provided for single value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorValueInit + < FunctorType + , ArgTag + , T & + // First substitution failure when FunctorType::init does not exist. + // Second substitution failure when FunctorType::init is not compatible. + , decltype( FunctorValueInitFunction< FunctorType , ArgTag >::enable_if( & FunctorType::init ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T & init( const FunctorType & f , void * p ) + { f.init( ArgTag() , *((T*)p) ); return *((T*)p) ; } +}; + +/* 'init' function provided for array value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorValueInit + < FunctorType + , ArgTag + , T * + // First substitution failure when FunctorType::init does not exist. + // Second substitution failure when FunctorType::init is not compatible + , decltype( FunctorValueInitFunction< FunctorType , ArgTag >::enable_if( & FunctorType::init ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T * init( const FunctorType & f , void * p ) + { f.init( ArgTag() , (T*)p ); return (T*)p ; } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +// Signatures for compatible FunctorType::join with tag and not an array +template< class FunctorType , class ArgTag , bool IsArray = 0 == FunctorValueTraits<FunctorType,ArgTag>::StaticValueSize > +struct FunctorValueJoinFunction { + + typedef typename FunctorValueTraits<FunctorType,ArgTag>::value_type value_type ; + + typedef volatile value_type & vref_type ; + typedef const volatile value_type & cvref_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , vref_type , cvref_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , vref_type , cvref_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , vref_type , cvref_type ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , vref_type , cvref_type ) ); +}; + +// Signatures for compatible FunctorType::join with tag and is an array +template< class FunctorType , class ArgTag > +struct FunctorValueJoinFunction< FunctorType , ArgTag , true > { + + typedef typename FunctorValueTraits<FunctorType,ArgTag>::value_type value_type ; + + typedef volatile value_type * vptr_type ; + typedef const volatile value_type * cvptr_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , vptr_type , cvptr_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , vptr_type , cvptr_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , vptr_type , cvptr_type ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , vptr_type , cvptr_type ) ); +}; + +// Signatures for compatible FunctorType::join without tag and not an array +template< class FunctorType > +struct FunctorValueJoinFunction< FunctorType , void , false > { + + typedef typename FunctorValueTraits<FunctorType,void>::value_type value_type ; + + typedef volatile value_type & vref_type ; + typedef const volatile value_type & cvref_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( vref_type , cvref_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( vref_type , cvref_type ) ); +}; + +// Signatures for compatible FunctorType::join without tag and is an array +template< class FunctorType > +struct FunctorValueJoinFunction< FunctorType , void , true > { + + typedef typename FunctorValueTraits<FunctorType,void>::value_type value_type ; + + typedef volatile value_type * vptr_type ; + typedef const volatile value_type * cvptr_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( vptr_type , cvptr_type ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( vptr_type , cvptr_type ) ); +}; + + +template< class FunctorType , class ArgTag + , class T = typename FunctorValueTraits<FunctorType,ArgTag>::reference_type + , class Enable = void > +struct FunctorValueJoin ; + +/* No 'join' function provided, single value */ +template< class FunctorType , class ArgTag , class T , class Enable > +struct FunctorValueJoin< FunctorType , ArgTag , T & , Enable > +{ + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& ){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f , volatile void * const lhs , const volatile void * const rhs ) + { + *((volatile T*)lhs) += *((const volatile T*)rhs); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator()( volatile T& lhs , const volatile T& rhs ) const + { + lhs += rhs; + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T& lhs , const T& rhs ) const + { + lhs += rhs; + } +}; + +/* No 'join' function provided, array of values */ +template< class FunctorType , class ArgTag , class T , class Enable > +struct FunctorValueJoin< FunctorType , ArgTag , T * , Enable > +{ + const FunctorType& f; + + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& f_):f(f_){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f_ , volatile void * const lhs , const volatile void * const rhs ) + { + const int n = FunctorValueTraits<FunctorType,ArgTag>::value_count(f_); + + for ( int i = 0 ; i < n ; ++i ) { ((volatile T*)lhs)[i] += ((const volatile T*)rhs)[i]; } + } + KOKKOS_FORCEINLINE_FUNCTION + void operator()( volatile T* const lhs , const volatile T* const rhs ) const + { + const int n = FunctorValueTraits<FunctorType,ArgTag>::value_count(f); + + for ( int i = 0 ; i < n ; ++i ) { lhs[i] += rhs[i]; } + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T* lhs , const T* rhs ) const + { + const int n = FunctorValueTraits<FunctorType,ArgTag>::value_count(f); + + for ( int i = 0 ; i < n ; ++i ) { lhs[i] += rhs[i]; } + } +}; + +/* 'join' function provided, single value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorValueJoin + < FunctorType + , ArgTag + , T & + // First substitution failure when FunctorType::join does not exist. + // Second substitution failure when enable_if( & Functor::join ) does not exist + , decltype( FunctorValueJoinFunction< FunctorType , ArgTag >::enable_if( & FunctorType::join ) ) + > +{ + const FunctorType& f; + + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& f_):f(f_){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f_ , volatile void * const lhs , const volatile void * const rhs ) + { + f_.join( ArgTag() , *((volatile T *)lhs) , *((const volatile T *)rhs) ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator()( volatile T& lhs , const volatile T& rhs ) const + { + f.join( ArgTag() , lhs , rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T& lhs , const T& rhs ) const + { + f.join( ArgTag(), lhs , rhs ); + } +}; + +/* 'join' function provided, no tag, single value */ +template< class FunctorType , class T > +struct FunctorValueJoin + < FunctorType + , void + , T & + // First substitution failure when FunctorType::join does not exist. + // Second substitution failure when enable_if( & Functor::join ) does not exist + , decltype( FunctorValueJoinFunction< FunctorType , void >::enable_if( & FunctorType::join ) ) + > +{ + const FunctorType& f; + + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& f_):f(f_){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f_ , volatile void * const lhs , const volatile void * const rhs ) + { + f_.join( *((volatile T *)lhs) , *((const volatile T *)rhs) ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator()( volatile T& lhs , const volatile T& rhs ) const + { + f.join( lhs , rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T& lhs , const T& rhs ) const + { + f.join( lhs , rhs ); + } +}; + +/* 'join' function provided for array value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorValueJoin + < FunctorType + , ArgTag + , T * + // First substitution failure when FunctorType::join does not exist. + // Second substitution failure when enable_if( & Functor::join ) does not exist + , decltype( FunctorValueJoinFunction< FunctorType , ArgTag >::enable_if( & FunctorType::join ) ) + > +{ + const FunctorType& f; + + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& f_):f(f_){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f_ , volatile void * const lhs , const volatile void * const rhs ) + { + f_.join( ArgTag() , (volatile T *)lhs , (const volatile T *)rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator()( volatile T* const lhs , const volatile T* const rhs ) const + { + f.join( ArgTag() , lhs , rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T* lhs , const T* rhs ) const + { + f.join( ArgTag(), lhs , rhs ); + } +}; + +/* 'join' function provided, no tag, array value */ +template< class FunctorType , class T > +struct FunctorValueJoin + < FunctorType + , void + , T * + // First substitution failure when FunctorType::join does not exist. + // Second substitution failure when enable_if( & Functor::join ) does not exist + , decltype( FunctorValueJoinFunction< FunctorType , void >::enable_if( & FunctorType::join ) ) + > +{ + const FunctorType& f; + + KOKKOS_FORCEINLINE_FUNCTION + FunctorValueJoin(const FunctorType& f_):f(f_){} + + KOKKOS_FORCEINLINE_FUNCTION static + void join( const FunctorType & f_ , volatile void * const lhs , const volatile void * const rhs ) + { + f_.join( (volatile T *)lhs , (const volatile T *)rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( volatile T* const lhs , const volatile T* const rhs ) const + { + f.join( lhs , rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION + void operator() ( T* lhs , const T* rhs ) const + { + f.join( lhs , rhs ); + } +}; + +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { + +namespace Impl { + + template<typename ValueType, class JoinOp, class Enable = void> + struct JoinLambdaAdapter { + typedef ValueType value_type; + const JoinOp& lambda; + KOKKOS_INLINE_FUNCTION + JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {} + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dst, const volatile value_type& src) const { + lambda(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void join(value_type& dst, const value_type& src) const { + lambda(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void operator() (volatile value_type& dst, const volatile value_type& src) const { + lambda(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void operator() (value_type& dst, const value_type& src) const { + lambda(dst,src); + } + }; + + template<typename ValueType, class JoinOp> + struct JoinLambdaAdapter<ValueType, JoinOp, decltype( FunctorValueJoinFunction< JoinOp , void >::enable_if( & JoinOp::join ) )> { + typedef ValueType value_type; + typedef StaticAssertSame<ValueType,typename JoinOp::value_type> assert_value_types_match; + const JoinOp& lambda; + KOKKOS_INLINE_FUNCTION + JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {} + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dst, const volatile value_type& src) const { + lambda.join(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void join(value_type& dst, const value_type& src) const { + lambda.join(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void operator() (volatile value_type& dst, const volatile value_type& src) const { + lambda.join(dst,src); + } + + KOKKOS_INLINE_FUNCTION + void operator() (value_type& dst, const value_type& src) const { + lambda.join(dst,src); + } + }; + + template<typename ValueType> + struct JoinAdd { + typedef ValueType value_type; + + KOKKOS_INLINE_FUNCTION + JoinAdd() {} + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dst, const volatile value_type& src) const { + dst+=src; + } + KOKKOS_INLINE_FUNCTION + void operator() (value_type& dst, const value_type& src) const { + dst+=src; + } + KOKKOS_INLINE_FUNCTION + void operator() (volatile value_type& dst, const volatile value_type& src) const { + dst+=src; + } + }; + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ArgTag + , class T = typename FunctorValueTraits<FunctorType,ArgTag>::reference_type > +struct FunctorValueOps ; + +template< class FunctorType , class ArgTag , class T > +struct FunctorValueOps< FunctorType , ArgTag , T & > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T * pointer( T & r ) { return & r ; } + + KOKKOS_FORCEINLINE_FUNCTION static + T & reference( void * p ) { return *((T*)p); } + + KOKKOS_FORCEINLINE_FUNCTION static + void copy( const FunctorType & , void * const lhs , const void * const rhs ) + { *((T*)lhs) = *((const T*)rhs); } +}; + +/* No 'join' function provided, array of values */ +template< class FunctorType , class ArgTag , class T > +struct FunctorValueOps< FunctorType , ArgTag , T * > +{ + KOKKOS_FORCEINLINE_FUNCTION static + T * pointer( T * p ) { return p ; } + + KOKKOS_FORCEINLINE_FUNCTION static + T * reference( void * p ) { return ((T*)p); } + + KOKKOS_FORCEINLINE_FUNCTION static + void copy( const FunctorType & f , void * const lhs , const void * const rhs ) + { + const int n = FunctorValueTraits<FunctorType,ArgTag>::value_count(f); + for ( int i = 0 ; i < n ; ++i ) { ((T*)lhs)[i] = ((const T*)rhs)[i]; } + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +// Compatible functions for 'final' function and value_type not an array +template< class FunctorType , class ArgTag , bool IsArray = 0 == FunctorValueTraits<FunctorType,ArgTag>::StaticValueSize > +struct FunctorFinalFunction { + + typedef typename FunctorValueTraits<FunctorType,ArgTag>::value_type value_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type & ) ); + + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type volatile & ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type volatile & ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type volatile & ) ); + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type const & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type const & ) ); + + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const volatile & ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const volatile & ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type const volatile & ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type const volatile & ) ); +}; + +// Compatible functions for 'final' function and value_type is an array +template< class FunctorType , class ArgTag > +struct FunctorFinalFunction< FunctorType , ArgTag , true > { + + typedef typename FunctorValueTraits<FunctorType,ArgTag>::value_type value_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type * ) ); + + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type volatile * ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type volatile * ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type volatile * ) ); + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type const * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type const * ) ); + + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const volatile * ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const volatile * ) const ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , value_type const volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , value_type const volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , value_type const volatile * ) ); + // KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , value_type const volatile * ) ); +}; + +template< class FunctorType > +struct FunctorFinalFunction< FunctorType , void , false > { + + typedef typename FunctorValueTraits<FunctorType,void>::value_type value_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( value_type & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( value_type & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( value_type & ) ); + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( const value_type & ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( const value_type & ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( const value_type & ) ); +}; + +template< class FunctorType > +struct FunctorFinalFunction< FunctorType , void , true > { + + typedef typename FunctorValueTraits<FunctorType,void>::value_type value_type ; + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( value_type * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( value_type * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( value_type * ) ); + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( const value_type * ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( const value_type * ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( const value_type * ) ); +}; + +/* No 'final' function provided */ +template< class FunctorType , class ArgTag + , class ResultType = typename FunctorValueTraits<FunctorType,ArgTag>::reference_type + , class Enable = void > +struct FunctorFinal +{ + KOKKOS_FORCEINLINE_FUNCTION static + void final( const FunctorType & , void * ) {} +}; + +/* 'final' function provided */ +template< class FunctorType , class ArgTag , class T > +struct FunctorFinal + < FunctorType + , ArgTag + , T & + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when enable_if( & Functor::final ) does not exist + , decltype( FunctorFinalFunction< FunctorType , ArgTag >::enable_if( & FunctorType::final ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + void final( const FunctorType & f , void * p ) { f.final( *((T*)p) ); } + + KOKKOS_FORCEINLINE_FUNCTION static + void final( FunctorType & f , void * p ) { f.final( *((T*)p) ); } +}; + +/* 'final' function provided for array value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorFinal + < FunctorType + , ArgTag + , T * + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when enable_if( & Functor::final ) does not exist + , decltype( FunctorFinalFunction< FunctorType , ArgTag >::enable_if( & FunctorType::final ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + void final( const FunctorType & f , void * p ) { f.final( (T*)p ); } + + KOKKOS_FORCEINLINE_FUNCTION static + void final( FunctorType & f , void * p ) { f.final( (T*)p ); } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ArgTag + , class ReferenceType = typename FunctorValueTraits<FunctorType,ArgTag>::reference_type > +struct FunctorApplyFunction { + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , ReferenceType ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , ReferenceType ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag , ReferenceType ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ArgTag const & , ReferenceType ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag , ReferenceType ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ArgTag const & , ReferenceType ) ); +}; + +template< class FunctorType , class ReferenceType > +struct FunctorApplyFunction< FunctorType , void , ReferenceType > { + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ReferenceType ) const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)( ReferenceType ) ); + KOKKOS_INLINE_FUNCTION static void enable_if( void ( *)( ReferenceType ) ); +}; + +template< class FunctorType > +struct FunctorApplyFunction< FunctorType , void , void > { + + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)() const ); + KOKKOS_INLINE_FUNCTION static void enable_if( void (FunctorType::*)() ); +}; + +template< class FunctorType , class ArgTag , class ReferenceType + , class Enable = void > +struct FunctorApply +{ + KOKKOS_FORCEINLINE_FUNCTION static + void apply( const FunctorType & , void * ) {} +}; + +/* 'apply' function provided for void value */ +template< class FunctorType , class ArgTag > +struct FunctorApply + < FunctorType + , ArgTag + , void + // First substitution failure when FunctorType::apply does not exist. + // Second substitution failure when enable_if( & Functor::apply ) does not exist + , decltype( FunctorApplyFunction< FunctorType , ArgTag , void >::enable_if( & FunctorType::apply ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + void apply( FunctorType & f ) { f.apply(); } + + KOKKOS_FORCEINLINE_FUNCTION static + void apply( const FunctorType & f ) { f.apply(); } +}; + +/* 'apply' function provided for single value */ +template< class FunctorType , class ArgTag , class T > +struct FunctorApply + < FunctorType + , ArgTag + , T & + // First substitution failure when FunctorType::apply does not exist. + // Second substitution failure when enable_if( & Functor::apply ) does not exist + , decltype( FunctorApplyFunction< FunctorType , ArgTag >::enable_if( & FunctorType::apply ) ) + > +{ + KOKKOS_FORCEINLINE_FUNCTION static + void apply( const FunctorType & f , void * p ) { f.apply( *((T*)p) ); } + + KOKKOS_FORCEINLINE_FUNCTION static + void apply( FunctorType & f , void * p ) { f.apply( *((T*)p) ); } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_FUNCTORADAPTER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4eb80d03f1fa0c26a2ba9524b16719dcf2a72e99 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp @@ -0,0 +1,108 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_HostSpace.hpp> + +#include <impl/Kokkos_HBWAllocators.hpp> +#include <impl/Kokkos_Error.hpp> + + +#include <stdint.h> // uintptr_t +#include <cstdlib> // for malloc, realloc, and free +#include <cstring> // for memcpy + +#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE) +#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc +#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES +#endif + +#include <sstream> +#include <iostream> + +#ifdef KOKKOS_HAVE_HBWSPACE +#include <memkind.h> + +namespace Kokkos { +namespace Experimental { +namespace Impl { +#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB) +/*--------------------------------------------------------------------------*/ + +void* HBWMallocAllocator::allocate( size_t size ) +{ + std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl; + void * ptr = NULL; + if (size) { + ptr = memkind_malloc(MEMKIND_TYPE,size); + + if (!ptr) + { + std::ostringstream msg ; + msg << name() << ": allocate(" << size << ") FAILED"; + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + } + return ptr; +} + +void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ ) +{ + if (ptr) { + memkind_free(MEMKIND_TYPE,ptr); + } +} + +void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size) +{ + void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size); + + if (new_size > 0u && ptr == NULL) { + Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory"); + } + return ptr; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp new file mode 100644 index 0000000000000000000000000000000000000000..be0134460b279f0cbb5f0bc1efda36863c0342ca --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp @@ -0,0 +1,75 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HBW_ALLOCATORS_HPP +#define KOKKOS_HBW_ALLOCATORS_HPP + +#ifdef KOKKOS_HAVE_HBWSPACE + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/// class MallocAllocator +class HBWMallocAllocator +{ +public: + static const char * name() + { + return "HBW Malloc Allocator"; + } + + static void* allocate(size_t size); + + static void deallocate(void * ptr, size_t size); + + static void * reallocate(void * old_ptr, size_t old_size, size_t new_size); +}; + +} +} +} // namespace Kokkos::Impl +#endif //KOKKOS_HAVE_HBWSPACE +#endif //KOKKOS_HBW_ALLOCATORS_HPP + + diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11cc120212b25804df0afb9f660ff8b165e0f217 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -0,0 +1,379 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include <Kokkos_Macros.hpp> + + +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include <memory.h> + +#include <iostream> +#include <sstream> +#include <cstring> +#include <algorithm> + +#include <Kokkos_HBWSpace.hpp> +#include <impl/Kokkos_Error.hpp> +#include <Kokkos_Atomic.hpp> +#ifdef KOKKOS_HAVE_HBWSPACE +#include <memkind.h> +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +#ifdef KOKKOS_HAVE_HBWSPACE +#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB) + +namespace Kokkos { +namespace Experimental { +namespace { + +static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ; + +typedef int (* QuerySpaceInParallelPtr )(); + +QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ; +int s_in_parallel_query_count = 0 ; + +} // namespace <empty> + +void HBWSpace::register_in_parallel( int (*device_in_parallel)() ) +{ + if ( 0 == device_in_parallel ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) ); + } + + int i = -1 ; + + if ( ! (device_in_parallel)() ) { + for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i ); + } + + if ( i < s_in_parallel_query_count ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) ); + + } + + if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) ); + + } + + for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i ); + + if ( i == s_in_parallel_query_count ) { + s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ; + } +} + +int HBWSpace::in_parallel() +{ + const int n = s_in_parallel_query_count ; + + int i = 0 ; + + while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; } + + return i < n ; +} + +} // namespace Experiemtal +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { + +/* Default allocation mechanism */ +HBWSpace::HBWSpace() + : m_alloc_mech( + HBWSpace::STD_MALLOC + ) +{ +printf("Init\n"); +setenv("MEMKIND_HBW_NODES", "1", 0); +} + +/* Default allocation mechanism */ +HBWSpace::HBWSpace( const HBWSpace::AllocationMechanism & arg_alloc_mech ) + : m_alloc_mech( HBWSpace::STD_MALLOC ) +{ +printf("Init2\n"); +setenv("MEMKIND_HBW_NODES", "1", 0); + if ( arg_alloc_mech == STD_MALLOC ) { + m_alloc_mech = HBWSpace::STD_MALLOC ; + } +} + +void * HBWSpace::allocate( const size_t arg_alloc_size ) const +{ + static_assert( sizeof(void*) == sizeof(uintptr_t) + , "Error sizeof(void*) != sizeof(uintptr_t)" ); + + static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value + , "Memory alignment must be power of two" ); + + constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ; + constexpr uintptr_t alignment_mask = alignment - 1 ; + + void * ptr = 0 ; + + if ( arg_alloc_size ) { + + if ( m_alloc_mech == STD_MALLOC ) { + // Over-allocate to and round up to guarantee proper alignment. + size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ; + + void * alloc_ptr = memkind_malloc(MEMKIND_TYPE, size_padded ); + + if (alloc_ptr) { + uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr); + + // offset enough to record the alloc_ptr + address += sizeof(void *); + uintptr_t rem = address % alignment; + uintptr_t offset = rem ? (alignment - rem) : 0u; + address += offset; + ptr = reinterpret_cast<void *>(address); + // record the alloc'd pointer + address -= sizeof(void *); + *reinterpret_cast<void **>(address) = alloc_ptr; + } + } + } + + if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) ) + || ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) { + std::ostringstream msg ; + msg << "Kokkos::Experimental::HBWSpace::allocate[ " ; + switch( m_alloc_mech ) { + case STD_MALLOC: msg << "STD_MALLOC" ; break ; + } + msg << " ]( " << arg_alloc_size << " ) FAILED" ; + if ( ptr == NULL ) { msg << " NULL" ; } + else { msg << " NOT ALIGNED " << ptr ; } + + std::cerr << msg.str() << std::endl ; + std::cerr.flush(); + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + return ptr; +} + + +void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const +{ + if ( arg_alloc_ptr ) { + + if ( m_alloc_mech == STD_MALLOC ) { + void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1); + memkind_free(MEMKIND_TYPE, alloc_ptr ); + } + + } +} + +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ; + +void +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +deallocate( SharedAllocationRecord< void , void > * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_space( arg_space ) +{ + // Fill in the Header information + RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); + + strncpy( RecordBase::m_alloc_ptr->m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); +} + +//---------------------------------------------------------------------------- + +void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data() + , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > * +SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( void * alloc_ptr ) +{ + typedef SharedAllocationHeader Header ; + typedef SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > RecordHost ; + + SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ; + RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ; + + if ( ! alloc_ptr || record->m_alloc_ptr != head ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) ); + } + + return record ; +} + +// Iterate records to print orphaned memory ... +void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: +print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail ); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { +namespace { + const unsigned HBW_SPACE_ATOMIC_MASK = 0xFFFF; + const unsigned HBW_SPACE_ATOMIC_XOR_MASK = 0x5A39; + static int HBW_SPACE_ATOMIC_LOCKS[HBW_SPACE_ATOMIC_MASK+1]; +} + +namespace Impl { +void init_lock_array_hbw_space() { + static int is_initialized = 0; + if(! is_initialized) + for(int i = 0; i < static_cast<int> (HBW_SPACE_ATOMIC_MASK+1); i++) + HBW_SPACE_ATOMIC_LOCKS[i] = 0; +} + +bool lock_address_hbw_space(void* ptr) { + return 0 == atomic_compare_exchange( &HBW_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] , + 0 , 1); +} + +void unlock_address_hbw_space(void* ptr) { + atomic_exchange( &HBW_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] , + 0); +} + +} +} +} +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b52f4591ef0b8c0b71445f6e33b4d913822e5446 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -0,0 +1,537 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <algorithm> +#include <Kokkos_Macros.hpp> + +/*--------------------------------------------------------------------------*/ + +#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA ) + +// Intel specialized allocator does not interoperate with CUDA memory allocation + +#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE + +#endif + +/*--------------------------------------------------------------------------*/ + +#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE) + +#include <unistd.h> +#include <sys/mman.h> + +/* mmap flags for private anonymous memory allocation */ + +#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE ) + #define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) +#elif defined( MAP_ANON ) && defined( MAP_PRIVATE ) + #define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON) +#endif + +// mmap flags for huge page tables +// the Cuda driver does not interoperate with MAP_HUGETLB +#if defined( KOKKOS_POSIX_MMAP_FLAGS ) + #if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA ) + #define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB ) + #else + #define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS + #endif +#endif + +#endif + +/*--------------------------------------------------------------------------*/ + +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include <memory.h> + +#include <iostream> +#include <sstream> +#include <cstring> + +#include <Kokkos_HostSpace.hpp> +#include <impl/Kokkos_Error.hpp> +#include <Kokkos_Atomic.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace { + +static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ; + +typedef int (* QuerySpaceInParallelPtr )(); + +QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ; +int s_in_parallel_query_count = 0 ; + +} // namespace <empty> + +void HostSpace::register_in_parallel( int (*device_in_parallel)() ) +{ + if ( 0 == device_in_parallel ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) ); + } + + int i = -1 ; + + if ( ! (device_in_parallel)() ) { + for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i ); + } + + if ( i < s_in_parallel_query_count ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) ); + + } + + if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) ); + + } + + for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i ); + + if ( i == s_in_parallel_query_count ) { + s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ; + } +} + +int HostSpace::in_parallel() +{ + const int n = s_in_parallel_query_count ; + + int i = 0 ; + + while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; } + + return i < n ; +} + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +/* Default allocation mechanism */ +HostSpace::HostSpace() + : m_alloc_mech( +#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE ) + HostSpace::INTEL_MM_ALLOC +#elif defined( KOKKOS_POSIX_MMAP_FLAGS ) + HostSpace::POSIX_MMAP +#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE ) + HostSpace::POSIX_MEMALIGN +#else + HostSpace::STD_MALLOC +#endif + ) +{} + +/* Default allocation mechanism */ +HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech ) + : m_alloc_mech( HostSpace::STD_MALLOC ) +{ + if ( arg_alloc_mech == STD_MALLOC ) { + m_alloc_mech = HostSpace::STD_MALLOC ; + } +#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE ) + else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) { + m_alloc_mech = HostSpace::INTEL_MM_ALLOC ; + } +#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE ) + else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) { + m_alloc_mech = HostSpace::POSIX_MEMALIGN ; + } +#elif defined( KOKKOS_POSIX_MMAP_FLAGS ) + else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) { + m_alloc_mech = HostSpace::POSIX_MMAP ; + } +#endif + else { + const char * const mech = + ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : ( + ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : ( + ( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" )); + + std::string msg ; + msg.append("Kokkos::HostSpace "); + msg.append(mech); + msg.append(" is not available" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +void * HostSpace::allocate( const size_t arg_alloc_size ) const +{ + static_assert( sizeof(void*) == sizeof(uintptr_t) + , "Error sizeof(void*) != sizeof(uintptr_t)" ); + + static_assert( Kokkos::Impl::is_integral_power_of_two( Kokkos::Impl::MEMORY_ALIGNMENT ) + , "Memory alignment must be power of two" ); + + constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ; + constexpr uintptr_t alignment_mask = alignment - 1 ; + + void * ptr = 0 ; + + if ( arg_alloc_size ) { + + if ( m_alloc_mech == STD_MALLOC ) { + // Over-allocate to and round up to guarantee proper alignment. + size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ; + + void * alloc_ptr = malloc( size_padded ); + + if (alloc_ptr) { + uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr); + + // offset enough to record the alloc_ptr + address += sizeof(void *); + uintptr_t rem = address % alignment; + uintptr_t offset = rem ? (alignment - rem) : 0u; + address += offset; + ptr = reinterpret_cast<void *>(address); + // record the alloc'd pointer + address -= sizeof(void *); + *reinterpret_cast<void **>(address) = alloc_ptr; + } + } + +#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE ) + else if ( m_alloc_mech == INTEL_MM_ALLOC ) { + ptr = _mm_malloc( arg_alloc_size , alignment ); + } +#endif + +#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE ) + else if ( m_alloc_mech == POSIX_MEMALIGN ) { + posix_memalign( & ptr, alignment , arg_alloc_size ); + } +#endif + +#if defined( KOKKOS_POSIX_MMAP_FLAGS ) + else if ( m_alloc_mech == POSIX_MMAP ) { + constexpr size_t use_huge_pages = (1u << 27); + constexpr int prot = PROT_READ | PROT_WRITE ; + const int flags = arg_alloc_size < use_huge_pages + ? KOKKOS_POSIX_MMAP_FLAGS + : KOKKOS_POSIX_MMAP_FLAGS_HUGE ; + + // read write access to private memory + + ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */ + , arg_alloc_size /* size in bytes */ + , prot /* memory protection */ + , flags /* visibility of updates */ + , -1 /* file descriptor */ + , 0 /* offset */ + ); + +/* Associated reallocation: + ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE ); +*/ + } +#endif + } + + if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) ) + || ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) { + std::ostringstream msg ; + msg << "Kokkos::HostSpace::allocate[ " ; + switch( m_alloc_mech ) { + case STD_MALLOC: msg << "STD_MALLOC" ; break ; + case POSIX_MEMALIGN: msg << "POSIX_MEMALIGN" ; break ; + case POSIX_MMAP: msg << "POSIX_MMAP" ; break ; + case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ; + } + msg << " ]( " << arg_alloc_size << " ) FAILED" ; + if ( ptr == NULL ) { msg << " NULL" ; } + else { msg << " NOT ALIGNED " << ptr ; } + + std::cerr << msg.str() << std::endl ; + std::cerr.flush(); + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + return ptr; +} + + +void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const +{ + if ( arg_alloc_ptr ) { + + if ( m_alloc_mech == STD_MALLOC ) { + void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1); + free( alloc_ptr ); + } + +#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE ) + else if ( m_alloc_mech == INTEL_MM_ALLOC ) { + _mm_free( arg_alloc_ptr ); + } +#endif + +#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE ) + else if ( m_alloc_mech == POSIX_MEMALIGN ) { + free( arg_alloc_ptr ); + } +#endif + +#if defined( KOKKOS_POSIX_MMAP_FLAGS ) + else if ( m_alloc_mech == POSIX_MMAP ) { + munmap( arg_alloc_ptr , arg_alloc_size ); + } +#endif + + } +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ; + +void +SharedAllocationRecord< Kokkos::HostSpace , void >:: +deallocate( SharedAllocationRecord< void , void > * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +SharedAllocationRecord< Kokkos::HostSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +SharedAllocationRecord< Kokkos::HostSpace , void >:: +SharedAllocationRecord( const Kokkos::HostSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_space( arg_space ) +{ + // Fill in the Header information + RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); + + strncpy( RecordBase::m_alloc_ptr->m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); +} + +//---------------------------------------------------------------------------- + +void * SharedAllocationRecord< Kokkos::HostSpace , void >:: +allocate_tracked( const Kokkos::HostSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::HostSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::HostSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + Kokkos::Impl::DeepCopy<HostSpace,HostSpace>( r_new->data() , r_old->data() + , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +SharedAllocationRecord< Kokkos::HostSpace , void > * +SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr ) +{ + typedef SharedAllocationHeader Header ; + typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ; + + SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ; + RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ; + + if ( ! alloc_ptr || record->m_alloc_ptr != head ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) ); + } + + return record ; +} + +// Iterate records to print orphaned memory ... +void SharedAllocationRecord< Kokkos::HostSpace , void >:: +print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail ); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< class > +struct ViewOperatorBoundsErrorAbort ; + +template<> +struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > { + static void apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ); +}; + +void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >:: +apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ) +{ + char buffer[512]; + + snprintf( buffer , sizeof(buffer) + , "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)" + , rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 + , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); + + Kokkos::Impl::throw_runtime_exception( buffer ); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace { + const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF; + const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39; + static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1]; +} + +namespace Impl { +void init_lock_array_host_space() { + static int is_initialized = 0; + if(! is_initialized) + for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++) + HOST_SPACE_ATOMIC_LOCKS[i] = 0; +} + +bool lock_address_host_space(void* ptr) { + return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , + 0 , 1); +} + +void unlock_address_host_space(void* ptr) { + atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , + 0); +} + +} +} diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb3da7501ebeeda048e0e8c78e81f20fb60060fa --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -0,0 +1,107 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE ) +#define KOKKOS_MEMORY_FENCE +namespace Kokkos { + +//---------------------------------------------------------------------------- + +KOKKOS_FORCEINLINE_FUNCTION +void memory_fence() +{ +#if defined( KOKKOS_ATOMICS_USE_CUDA ) + __threadfence(); +#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \ + ( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) ) + __sync_synchronize(); +#elif defined( KOKKOS_ATOMICS_USE_INTEL ) + _mm_mfence(); +#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) + #pragma omp flush +#elif defined( KOKKOS_ATOMICS_USE_WINDOWS ) + MemoryBarrier(); +#else + #error "Error: memory_fence() not defined" +#endif +} + +////////////////////////////////////////////////////// +// store_fence() +// +// If possible use a store fence on the architecture, if not run a full memory fence + +KOKKOS_FORCEINLINE_FUNCTION +void store_fence() +{ +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) + asm volatile ( + "sfence" ::: "memory" + ); +#else + memory_fence(); +#endif +} + +////////////////////////////////////////////////////// +// load_fence() +// +// If possible use a load fence on the architecture, if not run a full memory fence + +KOKKOS_FORCEINLINE_FUNCTION +void load_fence() +{ +#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) + asm volatile ( + "lfence" ::: "memory" + ); +#else + memory_fence(); +#endif +} + +} // namespace kokkos + +#endif + + diff --git a/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp b/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp new file mode 100644 index 0000000000000000000000000000000000000000..556c96d863a2b3d19a5f8c7941f3257dbca34f85 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp @@ -0,0 +1,73 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP +#define KOKKOS_PHYSICAL_LAYOUT_HPP + + +#include <Kokkos_View.hpp> +namespace Kokkos { +namespace Impl { + + + +struct PhysicalLayout { + enum LayoutType {Left,Right,Scalar,Error}; + LayoutType layout_type; + int rank; + long long int stride[8]; //distance between two neighboring elements in a given dimension + + template< class T , class L , class D , class M > + PhysicalLayout( const View<T,L,D,M> & view ) + : layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : ( + is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error )) + , rank( view.Rank ) + { + for(int i=0;i<8;i++) stride[i] = 0; + view.stride( stride ); + } +}; + +} +} +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8ea1e816cdab4fc29679ee8df8800cf2a59f026e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp @@ -0,0 +1,57 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 2.0 + // Copyright (2014) Sandia Corporation + // + // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) + // + // ************************************************************************ + //@HEADER +*/ + +#ifndef KOKKOSP_DEVICE_INFO_HPP +#define KOKKOSP_DEVICE_INFO_HPP + +namespace Kokkos { +namespace Profiling { + + struct KokkosPDeviceInfo { + uint32_t deviceID; + }; + +} +} + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91faed170abbeb6d552b6247c74afdaa1596e038 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -0,0 +1,186 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 2.0 + // Copyright (2014) Sandia Corporation + // + // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ + +#include <impl/Kokkos_Profiling_Interface.hpp> + +#if (KOKKOS_ENABLE_PROFILING) +#include <string.h> + +namespace Kokkos { + namespace Profiling { + bool profileLibraryLoaded() { + return (NULL != initProfileLibrary); + } + + void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginForCallee) { + Kokkos::fence(); + (*beginForCallee)(kernelPrefix.c_str(), devID, kernelID); + } + } + + void endParallelFor(const uint64_t kernelID) { + if(NULL != endForCallee) { + Kokkos::fence(); + (*endForCallee)(kernelID); + } + } + + void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginScanCallee) { + Kokkos::fence(); + (*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID); + } + } + + void endParallelScan(const uint64_t kernelID) { + if(NULL != endScanCallee) { + Kokkos::fence(); + (*endScanCallee)(kernelID); + } + } + + void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginReduceCallee) { + Kokkos::fence(); + (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); + } + } + + void endParallelReduce(const uint64_t kernelID) { + if(NULL != endReduceCallee) { + Kokkos::fence(); + (*endReduceCallee)(kernelID); + } + } + + void initialize() { + + // Make sure initialize calls happens only once + static int is_initialized = 0; + if(is_initialized) return; + is_initialized = 1; + + void* firstProfileLibrary; + + char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); + + // If we do not find a profiling library in the environment then exit + // early. + if( NULL == envProfileLibrary ) { + return ; + } + + char* envProfileCopy = (char*) malloc(sizeof(char) * (strlen(envProfileLibrary) + 1)); + sprintf(envProfileCopy, "%s", envProfileLibrary); + + char* profileLibraryName = strtok(envProfileCopy, ";"); + + if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) { + firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL); + + if(NULL == firstProfileLibrary) { + std::cerr << "Error: Unable to load KokkosP library: " << + profileLibraryName << std::endl; + } else { + std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl; + + // dlsym returns a pointer to an object, while we want to assign to pointer to function + // A direct cast will give warnings hence, we have to workaround the issue by casting pointer to pointers. + auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for"); + beginForCallee = *((beginFunction*) &p1); + auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan"); + beginScanCallee = *((beginFunction*) &p2); + auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce"); + beginReduceCallee = *((beginFunction*) &p3); + + auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan"); + endScanCallee = *((endFunction*) &p4); + auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for"); + endForCallee = *((endFunction*) &p5); + auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce"); + endReduceCallee = *((endFunction*) &p6); + + auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library"); + initProfileLibrary = *((initFunction*) &p7); + auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library"); + finalizeProfileLibrary = *((finalizeFunction*) &p8); + } + } + + if(NULL != initProfileLibrary) { + (*initProfileLibrary)(0, + (uint64_t) KOKKOSP_INTERFACE_VERSION, + (uint32_t) 0, + NULL); + } + + free(envProfileCopy); + } + + void finalize() { + // Make sure finalize calls happens only once + static int is_finalized = 0; + if(is_finalized) return; + is_finalized = 1; + + if(NULL != finalizeProfileLibrary) { + (*finalizeProfileLibrary)(); + + // Set all profile hooks to NULL to prevent + // any additional calls. Once we are told to + // finalize, we mean it + beginForCallee = NULL; + beginScanCallee = NULL; + beginReduceCallee = NULL; + endScanCallee = NULL; + endForCallee = NULL; + endReduceCallee = NULL; + initProfileLibrary = NULL; + finalizeProfileLibrary = NULL; + } + } + } +} + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4f01256335cd82962d1744a9895374c170a5cb8b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -0,0 +1,118 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 2.0 + // Copyright (2014) Sandia Corporation + // + // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ + +#ifndef KOKKOSP_INTERFACE_HPP +#define KOKKOSP_INTERFACE_HPP + +#include <cstddef> +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Macros.hpp> +#include <string> + +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_DeviceInfo.hpp> +#include <dlfcn.h> +#include <iostream> +#include <stdlib.h> +#endif + +#define KOKKOSP_INTERFACE_VERSION 20150628 + +#if (KOKKOS_ENABLE_PROFILING) +namespace Kokkos { + namespace Profiling { + + typedef void (*initFunction)(const int, + const uint64_t, + const uint32_t, + KokkosPDeviceInfo*); + typedef void (*finalizeFunction)(); + typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*); + typedef void (*endFunction)(uint64_t); + + static initFunction initProfileLibrary = NULL; + static finalizeFunction finalizeProfileLibrary = NULL; + static beginFunction beginForCallee = NULL; + static beginFunction beginScanCallee = NULL; + static beginFunction beginReduceCallee = NULL; + static endFunction endForCallee = NULL; + static endFunction endScanCallee = NULL; + static endFunction endReduceCallee = NULL; + + bool profileLibraryLoaded(); + + void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); + void endParallelFor(const uint64_t kernelID); + void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); + void endParallelScan(const uint64_t kernelID); + void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); + void endParallelReduce(const uint64_t kernelID); + + void initialize(); + void finalize(); + + //Define finalize_fake inline to get rid of warnings for unused static variables + inline void finalize_fake() { + if(NULL != finalizeProfileLibrary) { + (*finalizeProfileLibrary)(); + + // Set all profile hooks to NULL to prevent + // any additional calls. Once we are told to + // finalize, we mean it + beginForCallee = NULL; + beginScanCallee = NULL; + beginReduceCallee = NULL; + endScanCallee = NULL; + endForCallee = NULL; + endReduceCallee = NULL; + initProfileLibrary = NULL; + finalizeProfileLibrary = NULL; + } + } + + + } +} + +#endif +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp new file mode 100644 index 0000000000000000000000000000000000000000..562c7afc6de5e3b6913671e52abc5157dc61c6d5 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -0,0 +1,119 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdlib.h> +#include <sstream> +#include <Kokkos_Serial.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Error.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +namespace SerialImpl { + +Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {} + +Sentinel::~Sentinel() +{ + if ( m_scratch ) { free( m_scratch ); } + m_scratch = 0 ; + m_reduce_end = 0 ; + m_shared_end = 0 ; +} + +Sentinel & Sentinel::singleton() +{ + static Sentinel s ; return s ; +} + +inline +unsigned align( unsigned n ) +{ + enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 }; + return ( n + MASK ) & ~MASK ; +} + +} // namespace + +SerialTeamMember::SerialTeamMember( int arg_league_rank + , int arg_league_size + , int arg_shared_size + ) + : m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end + , arg_shared_size ) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) +{} + +} // namespace Impl + +void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size ) +{ + static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton(); + + reduce_size = Impl::SerialImpl::align( reduce_size ); + shared_size = Impl::SerialImpl::align( shared_size ); + + if ( ( s.m_reduce_end < reduce_size ) || + ( s.m_shared_end < s.m_reduce_end + shared_size ) ) { + + if ( s.m_scratch ) { free( s.m_scratch ); } + + if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ; + if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ; + + s.m_scratch = malloc( s.m_shared_end ); + } + + return s.m_scratch ; +} + +} // namespace Kokkos + +#endif // defined( KOKKOS_HAVE_SERIAL ) + + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8bdbde6c60f182f588617dda2a9c2f32530694c --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -0,0 +1,147 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_TaskQueue_impl.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::Serial > ; + +void TaskQueueSpecialization< Kokkos::Serial >::execute + ( TaskQueue< Kokkos::Serial > * const queue ) +{ + using execution_space = Kokkos::Serial ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member exec ; + + // Loop until all queues are empty + while ( 0 < queue->m_ready_count ) { + + task_root_type * task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end != task ) { + + // pop_task resulted in lock == task->m_next + // In the executing state + + (*task->m_apply)( task , & exec ); + +#if 0 + printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + // If a respawn then re-enqueue otherwise the task is complete + // and all tasks waiting on this task are updated. + queue->complete( task ); + } + else if ( 0 != queue->m_ready_count ) { + Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count"); + } + } +} + +void TaskQueueSpecialization< Kokkos::Serial > :: + iff_single_thread_recursive_execute( + TaskQueue< Kokkos::Serial > * const queue ) +{ + using execution_space = Kokkos::Serial ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member exec ; + + // Loop until no runnable task + + task_root_type * task = end ; + + do { + + task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & exec ); + + queue->complete( task ); + + } while(1); +} + +}} /* namespace Kokkos::Impl */ + +#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..48a110c5f1583cd4943a011f3d33bd25e3cd00f2 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -0,0 +1,271 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_SERIAL_TASK_HPP +#define KOKKOS_IMPL_SERIAL_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template<> +class TaskQueueSpecialization< Kokkos::Serial > +{ +public: + + using execution_space = Kokkos::Serial ; + using memory_space = Kokkos::HostSpace ; + using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; + using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + + static + void iff_single_thread_recursive_execute( queue_type * const ); + + static + void execute( queue_type * const ); + + template< typename FunctorType > + static + void proc_set_apply( task_base_type::function_type * ptr ) + { + using TaskType = TaskBase< Kokkos::Serial + , typename FunctorType::value_type + , FunctorType + > ; + *ptr = TaskType::apply ; + } +}; + +extern template class TaskQueue< Kokkos::Serial > ; + +//---------------------------------------------------------------------------- + +template<> +class TaskExec< Kokkos::Serial > +{ +public: + + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } +}; + +template<typename iType> +struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > > +{ + typedef iType index_type; + const iType start ; + const iType end ; + enum {increment = 1}; + //const TaskExec< Kokkos::Serial > & thread; + TaskExec< Kokkos::Serial > & thread; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct + //( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) + ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) + : start(0) + , end(arg_count) + , thread(arg_thread) + {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct + //( const TaskExec< Kokkos::Serial > & arg_thread + ( TaskExec< Kokkos::Serial > & arg_thread + , const iType& arg_start + , const iType & arg_end + ) + : start( arg_start ) + , end( arg_end) + , thread( arg_thread ) + {} +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +/* +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > +TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread + , const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); +} +*/ +//TODO const issue omp +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > +TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread + , const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); +} +/* +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > > +TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end); +} +*/ +//TODO const issue omp +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > > +TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end); +} + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) +{ + + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i, result); + + initialized_result = result; +} + +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i, result); + + initialized_result = result; +} +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) +{ +} +// placeholder for future function +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ +} + +template< typename ValueType, typename iType, class Lambda > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda) +{ + ValueType accum = 0 ; + ValueType val, local_total; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + local_total = 0; + lambda(i,local_total,false); + val = accum; + lambda(i,val,true); + accum += local_total; + } + +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, + const Lambda & lambda) +{ +} + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1577df07cd74f2634f9f98cc94d3825062ad3ff6 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp @@ -0,0 +1,348 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#include <impl/Kokkos_Serial_TaskPolicy.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <stdlib.h> +#include <stdexcept> +#include <iostream> +#include <sstream> +#include <string> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +TaskPolicy< Kokkos::Serial >::member_type & +TaskPolicy< Kokkos::Serial >::member_single() +{ + static member_type s(0,1,0); + return s ; +} + +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +typedef TaskMember< Kokkos::Serial , void , void > Task ; + +//---------------------------------------------------------------------------- + +namespace { + +inline +unsigned padded_sizeof_derived( unsigned sizeof_derived ) +{ + return sizeof_derived + + ( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 ); +} + +} // namespace + +void Task::deallocate( void * ptr ) +{ + free( ptr ); +} + +void * Task::allocate( const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity ) +{ + return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) ); +} + +Task::~TaskMember() +{ + +} + +Task::TaskMember( const Task::function_verify_type arg_verify + , const Task::function_dealloc_type arg_dealloc + , const Task::function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : m_dealloc( arg_dealloc ) + , m_verify( arg_verify ) + , m_apply( arg_apply ) + , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) + , m_wait( 0 ) + , m_next( 0 ) + , m_dep_capacity( arg_dependence_capacity ) + , m_dep_size( 0 ) + , m_ref_count( 0 ) + , m_state( TASK_STATE_CONSTRUCTING ) +{ + for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; +} + +Task::TaskMember( const Task::function_dealloc_type arg_dealloc + , const Task::function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : m_dealloc( arg_dealloc ) + , m_verify( & Task::verify_type<void> ) + , m_apply( arg_apply ) + , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) + , m_wait( 0 ) + , m_next( 0 ) + , m_dep_capacity( arg_dependence_capacity ) + , m_dep_size( 0 ) + , m_ref_count( 0 ) + , m_state( TASK_STATE_CONSTRUCTING ) +{ + for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; +} + +//---------------------------------------------------------------------------- + +void Task::throw_error_add_dependence() const +{ + std::cerr << "TaskMember< Serial >::add_dependence ERROR" + << " state(" << m_state << ")" + << " dep_size(" << m_dep_size << ")" + << std::endl ; + throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR"); +} + +void Task::throw_error_verify_type() +{ + throw std::runtime_error("TaskMember< Serial >::verify_type ERROR"); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) +{ + static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ; + static const char msg_error_count[] = ": negative reference count" ; + static const char msg_error_complete[] = ": destroy task that is not complete" ; + static const char msg_error_dependences[] = ": destroy task that has dependences" ; + static const char msg_error_exception[] = ": caught internal exception" ; + + const char * msg_error = 0 ; + + try { + + if ( *lhs ) { + + const int count = --((**lhs).m_ref_count); + + if ( 0 == count ) { + + // Reference count at zero, delete it + + // Should only be deallocating a completed task + if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) { + + // A completed task should not have dependences... + for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) { + if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ; + } + } + else { + msg_error = msg_error_complete ; + } + + if ( 0 == msg_error ) { + // Get deletion function and apply it + const Task::function_dealloc_type d = (**lhs).m_dealloc ; + + (*d)( *lhs ); + } + } + else if ( count <= 0 ) { + msg_error = msg_error_count ; + } + } + + if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); } + + *lhs = rhs ; + } + catch( ... ) { + if ( 0 == msg_error ) msg_error = msg_error_exception ; + } + + if ( 0 != msg_error ) { + if ( no_throw ) { + std::cerr << msg_error_header << msg_error << std::endl ; + std::cerr.flush(); + } + else { + std::string msg(msg_error_header); + msg.append(msg_error); + throw std::runtime_error( msg ); + } + } +} +#endif + +namespace { + +Task * s_ready = 0 ; +Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) ); + +} + +void Task::schedule() +{ + // Execute ready tasks in case the task being scheduled + // is dependent upon a waiting and ready task. + + Task::execute_ready_tasks(); + + // spawning : Constructing -> Waiting + // respawning : Executing -> Waiting + // updating : Waiting -> Waiting + + // Must not be in a dependence linked list: 0 == t->m_next + + const bool ok_state = TASK_STATE_COMPLETE != m_state ; + const bool ok_list = 0 == m_next ; + + if ( ok_state && ok_list ) { + + if ( TASK_STATE_CONSTRUCTING == m_state ) { + // Initial scheduling increment, + // matched by decrement when task is complete. + ++m_ref_count ; + } + + // Will be waiting for execution upon return from this function + + m_state = Kokkos::Experimental::TASK_STATE_WAITING ; + + // Insert this task into another dependence that is not complete + + int i = 0 ; + for ( ; i < m_dep_size ; ++i ) { + Task * const y = m_dep[i] ; + if ( y && s_denied != ( m_next = y->m_wait ) ) { + y->m_wait = this ; // CAS( & y->m_wait , m_next , this ); + break ; + } + } + if ( i == m_dep_size ) { + // All dependences are complete, insert into the ready list + m_next = s_ready ; + s_ready = this ; // CAS( & s_ready , m_next = s_ready , this ); + } + } + else { + throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error")); + } +} + +void Task::execute_ready_tasks() +{ + while ( s_ready ) { + + // Remove this task from the ready list + + // Task * task ; + // while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) ); + + Task * task = s_ready ; + + s_ready = task->m_next ; + + task->m_next = 0 ; + + // precondition: task->m_state = TASK_STATE_WAITING + // precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i + // precondition: does not exist T such that T->m_wait = task + // precondition: does not exist T such that T->m_next = task + + task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ; + + (*task->m_apply)( task ); + + if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) { + // task did not respawn itself + task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ; + + // release dependences: + for ( int i = 0 ; i < task->m_dep_size ; ++i ) { + assign( task->m_dep + i , 0 ); + } + + // Stop other tasks from adding themselves to 'task->m_wait' ; + + Task * x ; + // CAS( & task->m_wait , x = task->m_wait , s_denied ); + x = task->m_wait ; task->m_wait = s_denied ; + + // update tasks waiting on this task + while ( x ) { + Task * const next = x->m_next ; + + x->m_next = 0 ; + + x->schedule(); // could happen concurrently + + x = next ; + } + + // Decrement to match the initial scheduling increment + assign( & task , 0 ); + } + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a333f948ae18e3e3622d06551dd935aff0d77707 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp @@ -0,0 +1,677 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP +#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP + +#include <Kokkos_Macros.hpp> + +#if defined( KOKKOS_HAVE_SERIAL ) + +#include <string> +#include <typeinfo> +#include <stdexcept> + +#include <Kokkos_Serial.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_View.hpp> + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_FunctorAdapter.hpp> + +//---------------------------------------------------------------------------- +/* Inheritance structure to allow static_cast from the task root type + * and a task's FunctorType. + * + * task_root_type == TaskMember< Space , void , void > + * + * TaskMember< PolicyType , ResultType , FunctorType > + * : TaskMember< PolicyType::Space , ResultType , FunctorType > + * { ... }; + * + * TaskMember< Space , ResultType , FunctorType > + * : TaskMember< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * when ResultType != void + * + * TaskMember< Space , ResultType , void > + * : TaskMember< Space , void , void > + * { ... }; + * + */ +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief Base class for all tasks in the Serial execution space */ +template<> +class TaskMember< Kokkos::Serial , void , void > +{ +public: + + typedef void (* function_apply_type) ( TaskMember * ); + typedef void (* function_dealloc_type)( TaskMember * ); + typedef TaskMember * (* function_verify_type) ( TaskMember * ); + +private: + + const function_dealloc_type m_dealloc ; ///< Deallocation + const function_verify_type m_verify ; ///< Result type verification + const function_apply_type m_apply ; ///< Apply function + TaskMember ** const m_dep ; ///< Dependences + TaskMember * m_wait ; ///< Linked list of tasks waiting on this task + TaskMember * m_next ; ///< Linked list of tasks waiting on a different task + const int m_dep_capacity ; ///< Capacity of dependences + int m_dep_size ; ///< Actual count of dependences + int m_ref_count ; ///< Reference count + int m_state ; ///< State of the task + + // size = 6 Pointers + 4 ints + + TaskMember() /* = delete */ ; + TaskMember( const TaskMember & ) /* = delete */ ; + TaskMember & operator = ( const TaskMember & ) /* = delete */ ; + + static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity ); + static void deallocate( void * ); + + void throw_error_add_dependence() const ; + static void throw_error_verify_type(); + + template < class DerivedTaskType > + static + void deallocate( TaskMember * t ) + { + DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t); + ptr->~DerivedTaskType(); + deallocate( (void *) ptr ); + } + +protected : + + ~TaskMember(); + + // Used by TaskMember< Serial , ResultType , void > + TaskMember( const function_verify_type arg_verify + , const function_dealloc_type arg_dealloc + , const function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ); + + // Used for TaskMember< Serial , void , void > + TaskMember( const function_dealloc_type arg_dealloc + , const function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ); + +public: + + template< typename ResultType > + KOKKOS_FUNCTION static + TaskMember * verify_type( TaskMember * t ) + { + enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value }; + + if ( check_type && t != 0 ) { + + // Verify that t->m_verify is this function + const function_verify_type self = & TaskMember::template verify_type< ResultType > ; + + if ( t->m_verify != self ) { + t = 0 ; +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + throw_error_verify_type(); +#endif + } + } + return t ; + } + + //---------------------------------------- + /* Inheritence Requirements on task types: + * typedef FunctorType::value_type value_type ; + * class DerivedTaskType + * : public TaskMember< Serial , value_type , FunctorType > + * { ... }; + * class TaskMember< Serial , value_type , FunctorType > + * : public TaskMember< Serial , value_type , void > + * , public Functor + * { ... }; + * If value_type != void + * class TaskMember< Serial , value_type , void > + * : public TaskMember< Serial , void , void > + * + * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] + * + */ + + /** \brief Allocate and construct a single-thread task */ + template< class DerivedTaskType > + static + TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor + , const unsigned arg_dependence_capacity + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + typedef typename functor_type::value_type value_type ; + + DerivedTaskType * const task = + new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) + DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > + , & TaskMember::template apply_single< functor_type , value_type > + , sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_functor ); + + return static_cast< TaskMember * >( task ); + } + + /** \brief Allocate and construct a data parallel task */ + template< class DerivedTaskType > + static + TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy + , const typename DerivedTaskType::functor_type & arg_functor + , const unsigned arg_dependence_capacity + ) + { + DerivedTaskType * const task = + new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) + DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > + , sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_policy + , arg_functor + ); + + return static_cast< TaskMember * >( task ); + } + + /** \brief Allocate and construct a thread-team task */ + template< class DerivedTaskType > + static + TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor + , const unsigned arg_dependence_capacity + ) + { + typedef typename DerivedTaskType::functor_type functor_type ; + typedef typename functor_type::value_type value_type ; + + DerivedTaskType * const task = + new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) + DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > + , & TaskMember::template apply_team< functor_type , value_type > + , sizeof(DerivedTaskType) + , arg_dependence_capacity + , arg_functor ); + + return static_cast< TaskMember * >( task ); + } + + void schedule(); + static void execute_ready_tasks(); + + //---------------------------------------- + + typedef FutureValueTypeIsVoidError get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return get_result_type() ; } + + KOKKOS_INLINE_FUNCTION + Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } + + //---------------------------------------- + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + static + void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ); +#else + KOKKOS_INLINE_FUNCTION static + void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {} +#endif + + KOKKOS_INLINE_FUNCTION + TaskMember * get_dependence( int i ) const + { return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; } + + KOKKOS_INLINE_FUNCTION + int get_dependence() const + { return m_dep_size ; } + + KOKKOS_INLINE_FUNCTION + void clear_dependence() + { + for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 ); + m_dep_size = 0 ; + } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskMember * before ) + { + if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state || + Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) && + m_dep_size < m_dep_capacity ) { + assign( m_dep + m_dep_size , before ); + ++m_dep_size ; + } + else { + throw_error_add_dependence(); + } + } + + //---------------------------------------- + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; + + // TaskMember< Kokkos::Serial , ResultType , FunctorType > + // : public TaskMember< Kokkos::Serial , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result ); + } + + template< class FunctorType , class ResultType > + KOKKOS_INLINE_FUNCTION static + void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; + + // TaskMember< Kokkos::Serial , ResultType , FunctorType > + // : public TaskMember< Kokkos::Serial , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m ); + } + + //---------------------------------------- + + template< class FunctorType , class ResultType > + static + void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; + typedef Kokkos::Impl::SerialTeamMember member_type ; + + // TaskMember< Kokkos::Serial , ResultType , FunctorType > + // : public TaskMember< Kokkos::Serial , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + m.FunctorType::apply( member_type(0,1,0) , m.m_result ); + } + + template< class FunctorType , class ResultType > + static + void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + { + typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; + typedef Kokkos::Impl::SerialTeamMember member_type ; + + // TaskMember< Kokkos::Serial , ResultType , FunctorType > + // : public TaskMember< Kokkos::Serial , ResultType , void > + // , public FunctorType + // { ... }; + + derived_type & m = * static_cast< derived_type * >( t ); + + m.FunctorType::apply( member_type(0,1,0) ); + } +}; + +//---------------------------------------------------------------------------- +/** \brief Base class for tasks with a result value in the Serial execution space. + * + * The FunctorType must be void because this class is accessed by the + * Future class for the task and result value. + * + * Must be derived from TaskMember<S,void,void> 'root class' so the Future class + * can correctly static_cast from the 'root class' to this class. + */ +template < class ResultType > +class TaskMember< Kokkos::Serial , ResultType , void > + : public TaskMember< Kokkos::Serial , void , void > +{ +public: + + ResultType m_result ; + + typedef const ResultType & get_result_type ; + + KOKKOS_INLINE_FUNCTION + get_result_type get() const { return m_result ; } + +protected: + + typedef TaskMember< Kokkos::Serial , void , void > task_root_type ; + typedef task_root_type::function_dealloc_type function_dealloc_type ; + typedef task_root_type::function_apply_type function_apply_type ; + + inline + TaskMember( const function_dealloc_type arg_dealloc + , const function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + ) + : task_root_type( & task_root_type::template verify_type< ResultType > + , arg_dealloc + , arg_apply + , arg_sizeof_derived + , arg_dependence_capacity ) + , m_result() + {} +}; + +template< class ResultType , class FunctorType > +class TaskMember< Kokkos::Serial , ResultType , FunctorType > + : public TaskMember< Kokkos::Serial , ResultType , void > + , public FunctorType +{ +public: + + typedef FunctorType functor_type ; + + typedef TaskMember< Kokkos::Serial , void , void > task_root_type ; + typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ; + typedef task_root_type::function_dealloc_type function_dealloc_type ; + typedef task_root_type::function_apply_type function_apply_type ; + + inline + TaskMember( const function_dealloc_type arg_dealloc + , const function_apply_type arg_apply + , const unsigned arg_sizeof_derived + , const unsigned arg_dependence_capacity + , const functor_type & arg_functor + ) + : task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity ) + , functor_type( arg_functor ) + {} +}; + +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +template<> +class TaskPolicy< Kokkos::Serial > +{ +public: + + typedef Kokkos::Serial execution_space ; + typedef Kokkos::Impl::SerialTeamMember member_type ; + +private: + + typedef Impl::TaskMember< execution_space , void , void > task_root_type ; + + template< class FunctorType > + static inline + const task_root_type * get_task_root( const FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); + } + + template< class FunctorType > + static inline + task_root_type * get_task_root( FunctorType * f ) + { + typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; + return static_cast< task_root_type * >( static_cast< task_type * >(f) ); + } + + unsigned m_default_dependence_capacity ; + +public: + + // Stubbed out for now. + KOKKOS_INLINE_FUNCTION + int allocated_task_count() const { return 0 ; } + + TaskPolicy + ( const unsigned /* arg_task_max_count */ + , const unsigned /* arg_task_max_size */ + , const unsigned arg_task_default_dependence_capacity = 4 + , const unsigned /* arg_task_team_size */ = 0 + ) + : m_default_dependence_capacity( arg_task_default_dependence_capacity ) + {} + + KOKKOS_FUNCTION TaskPolicy() = default ; + KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; + KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; + + //---------------------------------------- + + template< class ValueType > + KOKKOS_INLINE_FUNCTION + const Future< ValueType , execution_space > & + spawn( const Future< ValueType , execution_space > & f + , const bool priority = false ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + f.m_task->schedule(); +#endif + return f ; + } + + //---------------------------------------- + // Create single-thread task + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type::create< task_type >( + functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) ) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + proc_create( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { return task_create( functor , dependence_capacity ); } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + task_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { + typedef typename FunctorType::value_type value_type ; + typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; + return Future< value_type , execution_space >( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + task_root_type::create_team< task_type >( + functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) ) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< typename FunctorType::value_type , execution_space > + proc_create_team( const FunctorType & functor + , const unsigned dependence_capacity = ~0u ) const + { return task_create_team( functor , dependence_capacity ); } + + //---------------------------------------- + // Add dependence + template< class A1 , class A2 , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( const Future<A1,A2> & after + , const Future<A3,A4> & before + , typename Kokkos::Impl::enable_if + < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value + && + Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + after.m_task->add_dependence( before.m_task ); +#endif + } + + //---------------------------------------- + // Functions for an executing task functor to query dependences, + // set new dependences, and respawn itself. + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + Future< void , execution_space > + get_dependence( const FunctorType * task_functor , int i ) const + { + return Future<void,execution_space>( +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->get_dependence(i) +#endif + ); + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + int get_dependence( const FunctorType * task_functor ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return get_task_root(task_functor)->get_dependence(); } +#else + { return 0 ; } +#endif + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void clear_dependence( FunctorType * task_functor ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { get_task_root(task_functor)->clear_dependence(); } +#else + {} +#endif + + template< class FunctorType , class A3 , class A4 > + KOKKOS_INLINE_FUNCTION + void add_dependence( FunctorType * task_functor + , const Future<A3,A4> & before + , typename Kokkos::Impl::enable_if + < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + >::type * = 0 + ) const +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { get_task_root(task_functor)->add_dependence( before.m_task ); } +#else + {} +#endif + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn( FunctorType * task_functor + , const bool priority = false ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->schedule(); +#endif + } + + template< class FunctorType > + KOKKOS_INLINE_FUNCTION + void respawn_needing_memory( FunctorType * task_functor ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + get_task_root(task_functor)->schedule(); +#endif + } + + //---------------------------------------- + + static member_type & member_single(); +}; + +inline +void wait( TaskPolicy< Kokkos::Serial > & ) +{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); } + +} /* namespace Experimental */ +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* defined( KOKKOS_HAVE_SERIAL ) */ +#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.cpp b/lib/kokkos/core/src/impl/Kokkos_Shape.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da12db1f381e790e46604f8a15280d2a07f5152a --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Shape.cpp @@ -0,0 +1,178 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include <sstream> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_Shape.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void assert_counts_are_equal_throw( + const size_t x_count , + const size_t y_count ) +{ + std::ostringstream msg ; + + msg << "Kokkos::Impl::assert_counts_are_equal_throw( " + << x_count << " != " << y_count << " )" ; + + throw_runtime_exception( msg.str() ); +} + +void assert_shapes_are_equal_throw( + const unsigned x_scalar_size , + const unsigned x_rank , + const size_t x_N0 , const unsigned x_N1 , + const unsigned x_N2 , const unsigned x_N3 , + const unsigned x_N4 , const unsigned x_N5 , + const unsigned x_N6 , const unsigned x_N7 , + + const unsigned y_scalar_size , + const unsigned y_rank , + const size_t y_N0 , const unsigned y_N1 , + const unsigned y_N2 , const unsigned y_N3 , + const unsigned y_N4 , const unsigned y_N5 , + const unsigned y_N6 , const unsigned y_N7 ) +{ + std::ostringstream msg ; + + msg << "Kokkos::Impl::assert_shape_are_equal_throw( {" + << " scalar_size(" << x_scalar_size + << ") rank(" << x_rank + << ") dimension(" ; + if ( 0 < x_rank ) { msg << " " << x_N0 ; } + if ( 1 < x_rank ) { msg << " " << x_N1 ; } + if ( 2 < x_rank ) { msg << " " << x_N2 ; } + if ( 3 < x_rank ) { msg << " " << x_N3 ; } + if ( 4 < x_rank ) { msg << " " << x_N4 ; } + if ( 5 < x_rank ) { msg << " " << x_N5 ; } + if ( 6 < x_rank ) { msg << " " << x_N6 ; } + if ( 7 < x_rank ) { msg << " " << x_N7 ; } + msg << " ) } != { " + << " scalar_size(" << y_scalar_size + << ") rank(" << y_rank + << ") dimension(" ; + if ( 0 < y_rank ) { msg << " " << y_N0 ; } + if ( 1 < y_rank ) { msg << " " << y_N1 ; } + if ( 2 < y_rank ) { msg << " " << y_N2 ; } + if ( 3 < y_rank ) { msg << " " << y_N3 ; } + if ( 4 < y_rank ) { msg << " " << y_N4 ; } + if ( 5 < y_rank ) { msg << " " << y_N5 ; } + if ( 6 < y_rank ) { msg << " " << y_N6 ; } + if ( 7 < y_rank ) { msg << " " << y_N7 ; } + msg << " ) } )" ; + + throw_runtime_exception( msg.str() ); +} + +void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply( + const size_t rank , + const size_t n0 , const size_t n1 , + const size_t n2 , const size_t n3 , + const size_t n4 , const size_t n5 , + const size_t n6 , const size_t n7 , + + const size_t arg_rank , + const size_t i0 , const size_t i1 , + const size_t i2 , const size_t i3 , + const size_t i4 , const size_t i5 , + const size_t i6 , const size_t i7 ) +{ + std::ostringstream msg ; + msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ; + if ( 0 < rank ) { msg << " " << n0 ; } + if ( 1 < rank ) { msg << " " << n1 ; } + if ( 2 < rank ) { msg << " " << n2 ; } + if ( 3 < rank ) { msg << " " << n3 ; } + if ( 4 < rank ) { msg << " " << n4 ; } + if ( 5 < rank ) { msg << " " << n5 ; } + if ( 6 < rank ) { msg << " " << n6 ; } + if ( 7 < rank ) { msg << " " << n7 ; } + msg << " } index = {" ; + if ( 0 < arg_rank ) { msg << " " << i0 ; } + if ( 1 < arg_rank ) { msg << " " << i1 ; } + if ( 2 < arg_rank ) { msg << " " << i2 ; } + if ( 3 < arg_rank ) { msg << " " << i3 ; } + if ( 4 < arg_rank ) { msg << " " << i4 ; } + if ( 5 < arg_rank ) { msg << " " << i5 ; } + if ( 6 < arg_rank ) { msg << " " << i6 ; } + if ( 7 < arg_rank ) { msg << " " << i7 ; } + msg << " } )" ; + + throw_runtime_exception( msg.str() ); +} + +void assert_shape_effective_rank1_at_leastN_throw( + const size_t x_rank , const size_t x_N0 , + const size_t x_N1 , const size_t x_N2 , + const size_t x_N3 , const size_t x_N4 , + const size_t x_N5 , const size_t x_N6 , + const size_t x_N7 , + const size_t N0 ) +{ + std::ostringstream msg ; + + msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ; + if ( 0 < x_rank ) { msg << " " << x_N0 ; } + if ( 1 < x_rank ) { msg << " " << x_N1 ; } + if ( 2 < x_rank ) { msg << " " << x_N2 ; } + if ( 3 < x_rank ) { msg << " " << x_N3 ; } + if ( 4 < x_rank ) { msg << " " << x_N4 ; } + if ( 5 < x_rank ) { msg << " " << x_N5 ; } + if ( 6 < x_rank ) { msg << " " << x_N6 ; } + if ( 7 < x_rank ) { msg << " " << x_N7 ; } + msg << " } N = " << N0 << " )" ; + + throw_runtime_exception( msg.str() ); +} + + + +} +} + diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9749e0a1ff73107b97435862f737d96439fcb9d3 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp @@ -0,0 +1,917 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SHAPE_HPP +#define KOKKOS_SHAPE_HPP + +#include <typeinfo> +#include <utility> +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_StaticAssert.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +/** \brief The shape of a Kokkos with dynamic and static dimensions. + * Dynamic dimensions are member values and static dimensions are + * 'static const' values. + * + * The upper bound on the array rank is eight. + */ +template< unsigned ScalarSize , + unsigned Rank , + unsigned s0 = 1 , + unsigned s1 = 1 , + unsigned s2 = 1 , + unsigned s3 = 1 , + unsigned s4 = 1 , + unsigned s5 = 1 , + unsigned s6 = 1 , + unsigned s7 = 1 > +struct Shape ; + +//---------------------------------------------------------------------------- +/** \brief Shape equality if the value type, layout, and dimensions + * are equal. + */ +template< unsigned xSize , unsigned xRank , + unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , + unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , + + unsigned ySize , unsigned yRank , + unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , + unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > +KOKKOS_INLINE_FUNCTION +bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , + const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) +{ + enum { same_size = xSize == ySize }; + enum { same_rank = xRank == yRank }; + + return same_size && same_rank && + size_t( x.N0 ) == size_t( y.N0 ) && + unsigned( x.N1 ) == unsigned( y.N1 ) && + unsigned( x.N2 ) == unsigned( y.N2 ) && + unsigned( x.N3 ) == unsigned( y.N3 ) && + unsigned( x.N4 ) == unsigned( y.N4 ) && + unsigned( x.N5 ) == unsigned( y.N5 ) && + unsigned( x.N6 ) == unsigned( y.N6 ) && + unsigned( x.N7 ) == unsigned( y.N7 ) ; +} + +template< unsigned xSize , unsigned xRank , + unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , + unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , + + unsigned ySize ,unsigned yRank , + unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , + unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > +KOKKOS_INLINE_FUNCTION +bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , + const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) +{ return ! operator == ( x , y ); } + +//---------------------------------------------------------------------------- + +void assert_counts_are_equal_throw( + const size_t x_count , + const size_t y_count ); + +inline +void assert_counts_are_equal( + const size_t x_count , + const size_t y_count ) +{ + if ( x_count != y_count ) { + assert_counts_are_equal_throw( x_count , y_count ); + } +} + +void assert_shapes_are_equal_throw( + const unsigned x_scalar_size , + const unsigned x_rank , + const size_t x_N0 , const unsigned x_N1 , + const unsigned x_N2 , const unsigned x_N3 , + const unsigned x_N4 , const unsigned x_N5 , + const unsigned x_N6 , const unsigned x_N7 , + + const unsigned y_scalar_size , + const unsigned y_rank , + const size_t y_N0 , const unsigned y_N1 , + const unsigned y_N2 , const unsigned y_N3 , + const unsigned y_N4 , const unsigned y_N5 , + const unsigned y_N6 , const unsigned y_N7 ); + +template< unsigned xSize , unsigned xRank , + unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , + unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , + + unsigned ySize , unsigned yRank , + unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , + unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > +inline +void assert_shapes_are_equal( + const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , + const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) +{ + typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ; + typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ; + + if ( x != y ) { + assert_shapes_are_equal_throw( + x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, + y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); + } +} + +template< unsigned xSize , unsigned xRank , + unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , + unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , + + unsigned ySize , unsigned yRank , + unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , + unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > +void assert_shapes_equal_dimension( + const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , + const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) +{ + typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ; + typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ; + + // Omit comparison of scalar_size. + if ( unsigned( x.rank ) != unsigned( y.rank ) || + size_t( x.N0 ) != size_t( y.N0 ) || + unsigned( x.N1 ) != unsigned( y.N1 ) || + unsigned( x.N2 ) != unsigned( y.N2 ) || + unsigned( x.N3 ) != unsigned( y.N3 ) || + unsigned( x.N4 ) != unsigned( y.N4 ) || + unsigned( x.N5 ) != unsigned( y.N5 ) || + unsigned( x.N6 ) != unsigned( y.N6 ) || + unsigned( x.N7 ) != unsigned( y.N7 ) ) { + assert_shapes_are_equal_throw( + x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, + y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); + } +} + +//---------------------------------------------------------------------------- + +template< class ShapeType > struct assert_shape_is_rank_zero ; +template< class ShapeType > struct assert_shape_is_rank_one ; + +template< unsigned Size > +struct assert_shape_is_rank_zero< Shape<Size,0> > + : public true_type {}; + +template< unsigned Size , unsigned s0 > +struct assert_shape_is_rank_one< Shape<Size,1,s0> > + : public true_type {}; + +//---------------------------------------------------------------------------- + +/** \brief Array bounds assertion templated on the execution space + * to allow device-specific abort code. + */ +template< class Space > +struct AssertShapeBoundsAbort ; + +template<> +struct AssertShapeBoundsAbort< Kokkos::HostSpace > +{ + static void apply( const size_t rank , + const size_t n0 , const size_t n1 , + const size_t n2 , const size_t n3 , + const size_t n4 , const size_t n5 , + const size_t n6 , const size_t n7 , + const size_t arg_rank , + const size_t i0 , const size_t i1 , + const size_t i2 , const size_t i3 , + const size_t i4 , const size_t i5 , + const size_t i6 , const size_t i7 ); +}; + +template< class ExecutionSpace > +struct AssertShapeBoundsAbort +{ + KOKKOS_INLINE_FUNCTION + static void apply( const size_t rank , + const size_t n0 , const size_t n1 , + const size_t n2 , const size_t n3 , + const size_t n4 , const size_t n5 , + const size_t n6 , const size_t n7 , + const size_t arg_rank , + const size_t i0 , const size_t i1 , + const size_t i2 , const size_t i3 , + const size_t i4 , const size_t i5 , + const size_t i6 , const size_t i7 ) + { + AssertShapeBoundsAbort< Kokkos::HostSpace > + ::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 , + arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); + } +}; + +template< class ShapeType > +KOKKOS_INLINE_FUNCTION +void assert_shape_bounds( const ShapeType & shape , + const size_t arg_rank , + const size_t i0 , + const size_t i1 = 0 , + const size_t i2 = 0 , + const size_t i3 = 0 , + const size_t i4 = 0 , + const size_t i5 = 0 , + const size_t i6 = 0 , + const size_t i7 = 0 ) +{ + // Must supply at least as many indices as ranks. + // Every index must be within bounds. + const bool ok = ShapeType::rank <= arg_rank && + i0 < size_t(shape.N0) && + i1 < size_t(shape.N1) && + i2 < size_t(shape.N2) && + i3 < size_t(shape.N3) && + i4 < size_t(shape.N4) && + i5 < size_t(shape.N5) && + i6 < size_t(shape.N6) && + i7 < size_t(shape.N7) ; + + if ( ! ok ) { + AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace > + ::apply( ShapeType::rank , + shape.N0 , shape.N1 , shape.N2 , shape.N3 , + shape.N4 , shape.N5 , shape.N6 , shape.N7 , + arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); + } +} + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) +#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6); +#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7); +#else +#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */ +#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */ +#endif + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Specialization and optimization for the Rank 0 shape. + +template < unsigned ScalarSize > +struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 0 }; + enum { rank = 0 }; + + enum { N0 = 1 }; + enum { N1 = 1 }; + enum { N2 = 1 }; + enum { N3 = 1 }; + enum { N4 = 1 }; + enum { N5 = 1 }; + enum { N6 = 1 }; + enum { N7 = 1 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + {} +}; + +//---------------------------------------------------------------------------- + +template< unsigned R > struct assign_shape_dimension ; + +#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \ +template<> \ +struct assign_shape_dimension< R > \ +{ \ + template< class ShapeType > \ + KOKKOS_INLINE_FUNCTION \ + assign_shape_dimension( ShapeType & shape \ + , typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \ + ) { shape.N ## R = n ; } \ +}; + +KOKKOS_ASSIGN_SHAPE_DIMENSION(0) +KOKKOS_ASSIGN_SHAPE_DIMENSION(1) +KOKKOS_ASSIGN_SHAPE_DIMENSION(2) +KOKKOS_ASSIGN_SHAPE_DIMENSION(3) +KOKKOS_ASSIGN_SHAPE_DIMENSION(4) +KOKKOS_ASSIGN_SHAPE_DIMENSION(5) +KOKKOS_ASSIGN_SHAPE_DIMENSION(6) +KOKKOS_ASSIGN_SHAPE_DIMENSION(7) + +#undef KOKKOS_ASSIGN_SHAPE_DIMENSION + +//---------------------------------------------------------------------------- +// All-static dimension array + +template < unsigned ScalarSize , + unsigned Rank , + unsigned s0 , + unsigned s1 , + unsigned s2 , + unsigned s3 , + unsigned s4 , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape { + + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 0 }; + enum { rank = Rank }; + + enum { N0 = s0 }; + enum { N1 = s1 }; + enum { N2 = s2 }; + enum { N3 = s3 }; + enum { N4 = s4 }; + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + {} +}; + +// 1 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , + unsigned Rank , + unsigned s1 , + unsigned s2 , + unsigned s3 , + unsigned s4 , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 1 }; + enum { rank = Rank }; + + size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32 + + enum { N1 = s1 }; + enum { N2 = s2 }; + enum { N3 = s3 }; + enum { N4 = s4 }; + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + { s.N0 = n0 ; } +}; + +// 2 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , unsigned Rank , + unsigned s2 , + unsigned s3 , + unsigned s4 , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 2 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + + enum { N2 = s2 }; + enum { N3 = s3 }; + enum { N4 = s4 }; + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + { s.N0 = n0 ; s.N1 = n1 ; } +}; + +// 3 == dynamic_rank <= rank <= 8 +template < unsigned Rank , unsigned ScalarSize , + unsigned s3 , + unsigned s4 , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7> +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 3 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + + enum { N3 = s3 }; + enum { N4 = s4 }; + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; } +}; + +// 4 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , unsigned Rank , + unsigned s4 , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 4 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + unsigned N3 ; + + enum { N4 = s4 }; + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , + unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; } +}; + +// 5 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , unsigned Rank , + unsigned s5 , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 5 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + unsigned N3 ; + unsigned N4 ; + + enum { N5 = s5 }; + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , + unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) + { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; } +}; + +// 6 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , unsigned Rank , + unsigned s6 , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 6 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + unsigned N3 ; + unsigned N4 ; + unsigned N5 ; + + enum { N6 = s6 }; + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , + unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 ) + { + s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; + s.N4 = n4 ; s.N5 = n5 ; + } +}; + +// 7 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize , unsigned Rank , + unsigned s7 > +struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 7 }; + enum { rank = Rank }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + unsigned N3 ; + unsigned N4 ; + unsigned N5 ; + unsigned N6 ; + + enum { N7 = s7 }; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , + unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 ) + { + s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; + s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; + } +}; + +// 8 == dynamic_rank <= rank <= 8 +template < unsigned ScalarSize > +struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 > +{ + enum { scalar_size = ScalarSize }; + enum { rank_dynamic = 8 }; + enum { rank = 8 }; + + unsigned N0 ; + unsigned N1 ; + unsigned N2 ; + unsigned N3 ; + unsigned N4 ; + unsigned N5 ; + unsigned N6 ; + unsigned N7 ; + + KOKKOS_INLINE_FUNCTION + static + void assign( Shape & s , + unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , + unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 ) + { + s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; + s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ; + } +}; + +//---------------------------------------------------------------------------- + +template< class ShapeType , unsigned N , + unsigned R = ShapeType::rank_dynamic > +struct ShapeInsert ; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 0 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + N , + ShapeType::N0 , + ShapeType::N1 , + ShapeType::N2 , + ShapeType::N3 , + ShapeType::N4 , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 1 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + N , + ShapeType::N1 , + ShapeType::N2 , + ShapeType::N3 , + ShapeType::N4 , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 2 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + N , + ShapeType::N2 , + ShapeType::N3 , + ShapeType::N4 , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 3 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + 0 , + N , + ShapeType::N3 , + ShapeType::N4 , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 4 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + 0 , + 0 , + N , + ShapeType::N4 , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 5 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + 0 , + 0 , + 0 , + N , + ShapeType::N5 , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 6 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + 0 , + 0 , + 0 , + 0 , + N , + ShapeType::N6 > type ; +}; + +template< class ShapeType , unsigned N > +struct ShapeInsert< ShapeType , N , 7 > +{ + typedef Shape< ShapeType::scalar_size , + ShapeType::rank + 1 , + 0 , + 0 , + 0 , + 0 , + 0 , + 0 , + 0 , + N > type ; +}; + +//---------------------------------------------------------------------------- + +template< class DstShape , class SrcShape , + unsigned DstRankDynamic = DstShape::rank_dynamic , + bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) > +struct ShapeCompatible { enum { value = false }; }; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 8 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 7 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 6 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 5 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 4 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N4) == unsigned(SrcShape::N4) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 3 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N3) == unsigned(SrcShape::N3) && + unsigned(DstShape::N4) == unsigned(SrcShape::N4) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 2 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N2) == unsigned(SrcShape::N2) && + unsigned(DstShape::N3) == unsigned(SrcShape::N3) && + unsigned(DstShape::N4) == unsigned(SrcShape::N4) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 1 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N1) == unsigned(SrcShape::N1) && + unsigned(DstShape::N2) == unsigned(SrcShape::N2) && + unsigned(DstShape::N3) == unsigned(SrcShape::N3) && + unsigned(DstShape::N4) == unsigned(SrcShape::N4) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +template< class DstShape , class SrcShape > +struct ShapeCompatible< DstShape , SrcShape , 0 , true > +{ + enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && + unsigned(DstShape::N0) == unsigned(SrcShape::N0) && + unsigned(DstShape::N1) == unsigned(SrcShape::N1) && + unsigned(DstShape::N2) == unsigned(SrcShape::N2) && + unsigned(DstShape::N3) == unsigned(SrcShape::N3) && + unsigned(DstShape::N4) == unsigned(SrcShape::N4) && + unsigned(DstShape::N5) == unsigned(SrcShape::N5) && + unsigned(DstShape::N6) == unsigned(SrcShape::N6) && + unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< unsigned ScalarSize , unsigned Rank , + unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , + unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 , + typename iType > +KOKKOS_INLINE_FUNCTION +size_t dimension( + const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape , + const iType & r ) +{ + return 0 == r ? shape.N0 : ( + 1 == r ? shape.N1 : ( + 2 == r ? shape.N2 : ( + 3 == r ? shape.N3 : ( + 4 == r ? shape.N4 : ( + 5 == r ? shape.N5 : ( + 6 == r ? shape.N6 : ( + 7 == r ? shape.N7 : 1 ))))))); +} + +template< unsigned ScalarSize , unsigned Rank , + unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , + unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 > +KOKKOS_INLINE_FUNCTION +size_t cardinality_count( + const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ) +{ + return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 * + shape.N4 * shape.N5 * shape.N6 * shape.N7 ; +} + +//---------------------------------------------------------------------------- + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_CORESHAPE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp b/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp new file mode 100644 index 0000000000000000000000000000000000000000..86bc94ab0be9e8cfd00ea5a95cebc906bd3aa312 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SINGLETON_HPP +#define KOKKOS_SINGLETON_HPP + +#include <Kokkos_Macros.hpp> +#include <cstddef> + +namespace Kokkos { namespace Impl { + + +}} // namespace Kokkos::Impl + +#endif // KOKKOS_SINGLETON_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp b/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp new file mode 100644 index 0000000000000000000000000000000000000000..25e2ec9dc1849db862d9cb0d01bfd817c584b3b8 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp @@ -0,0 +1,79 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STATICASSERT_HPP +#define KOKKOS_STATICASSERT_HPP + +namespace Kokkos { +namespace Impl { + +template < bool , class T = void > +struct StaticAssert ; + +template< class T > +struct StaticAssert< true , T > { + typedef T type ; + static const bool value = true ; +}; + +template < class A , class B > +struct StaticAssertSame ; + +template < class A > +struct StaticAssertSame<A,A> { typedef A type ; }; + +template < class A , class B > +struct StaticAssertAssignable ; + +template < class A > +struct StaticAssertAssignable<A,A> { typedef A type ; }; + +template < class A > +struct StaticAssertAssignable< const A , A > { typedef const A type ; }; + +} // namespace Impl +} // namespace Kokkos + +#endif /* KOKKOS_STATICASSERT_HPP */ + + diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b2aea14df44ea55b8c86a70c9907792b51525918 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp @@ -0,0 +1,693 @@ +/* + +Copyright (c) 2014, NVIDIA Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef KOKKOS_SYNCHRONIC_HPP +#define KOKKOS_SYNCHRONIC_HPP + +#include <impl/Kokkos_Synchronic_Config.hpp> + +#include <atomic> +#include <chrono> +#include <thread> +#include <functional> +#include <algorithm> + +namespace Kokkos { +namespace Impl { + +enum notify_hint { + notify_all, + notify_one, + notify_none +}; +enum expect_hint { + expect_urgent, + expect_delay +}; + +namespace Details { + +template <class S, class T> +bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept { + int i = 0; + for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i) + if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) + return true; + else + __synchronic_relax(); + for(;i < attempts; ++i) + if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) + return true; + else + __synchronic_yield(); + return false; +} + +struct __exponential_backoff { + __exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) { + } + static inline void sleep_for(std::chrono::microseconds const& time) { + auto t = time.count(); + if(__builtin_expect(t > 75,0)) { + portable_sleep(time); + } + else if(__builtin_expect(t > 25,0)) + __synchronic_yield(); + else + __synchronic_relax(); + } + void sleep_for_step() { + sleep_for(step()); + } + std::chrono::microseconds step() { + float const f = ranfu(); + int const t = int(microseconds * f); + if(__builtin_expect(f >= 0.95f,0)) + microseconds = 8; + else + microseconds = (std::min)(microseconds>>1,maximum); + return std::chrono::microseconds(t); + } +private : + int maximum, microseconds, x, y, z; + int xorshf96() { + int t; + x ^= x << 16; x ^= x >> 5; x ^= x << 1; + t = x; x = y; y = z; z = t ^ x ^ y; + return z; + } + float ranfu() { + return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1); + } +}; + +template <class T, class Enable = void> +struct __synchronic_base { + +protected: + std::atomic<T> atom; + + void notify(notify_hint = notify_all) noexcept { + } + void notify(notify_hint = notify_all) volatile noexcept { + } + +public : + __synchronic_base() noexcept = default; + constexpr __synchronic_base(T v) noexcept : atom(v) { } + __synchronic_base(const __synchronic_base&) = delete; + ~__synchronic_base() { } + __synchronic_base& operator=(const __synchronic_base&) = delete; + __synchronic_base& operator=(const __synchronic_base&) volatile = delete; + + void expect_update(T val, expect_hint = expect_urgent) const noexcept { + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) + return; + __exponential_backoff b; + while(atom.load(std::memory_order_relaxed) == val) { + __do_backoff(b); + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) + return; + } + } + void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) + return; + __exponential_backoff b; + while(atom.load(std::memory_order_relaxed) == val) { + __do_backoff(b); + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) + return; + } + } + + template <class Clock, class Duration> + void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const { + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) + return; + __exponential_backoff b; + std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); + while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { + __do_backoff(b); + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) + return; + remains = then - std::chrono::high_resolution_clock::now(); + } + } + template <class Clock, class Duration> + void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile { + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) + return; + __exponential_backoff b; + std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); + while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { + __do_backoff(b); + if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) + return; + remains = then - std::chrono::high_resolution_clock::now(); + } + } +}; + +#ifdef __SYNCHRONIC_COMPATIBLE +template <class T> +struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> { + +public: + std::atomic<T> atom; + + void notify(notify_hint hint = notify_all) noexcept { + if(__builtin_expect(hint == notify_none,1)) + return; + auto const x = count.fetch_add(0,std::memory_order_acq_rel); + if(__builtin_expect(x,0)) { + if(__builtin_expect(hint == notify_all,1)) + __synchronic_wake_all(&atom); + else + __synchronic_wake_one(&atom); + } + } + void notify(notify_hint hint = notify_all) volatile noexcept { + if(__builtin_expect(hint == notify_none,1)) + return; + auto const x = count.fetch_add(0,std::memory_order_acq_rel); + if(__builtin_expect(x,0)) { + if(__builtin_expect(hint == notify_all,1)) + __synchronic_wake_all_volatile(&atom); + else + __synchronic_wake_one_volatile(&atom); + } + } + +public : + __synchronic_base() noexcept : count(0) { } + constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { } + __synchronic_base(const __synchronic_base&) = delete; + ~__synchronic_base() { } + __synchronic_base& operator=(const __synchronic_base&) = delete; + __synchronic_base& operator=(const __synchronic_base&) volatile = delete; + + void expect_update(T val, expect_hint = expect_urgent) const noexcept { + if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) + return; + while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { + count.fetch_add(1,std::memory_order_release); + __synchronic_wait(&atom,val); + count.fetch_add(-1,std::memory_order_acquire); + } + } + void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { + if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) + return; + while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { + count.fetch_add(1,std::memory_order_release); + __synchronic_wait_volatile(&atom,val); + count.fetch_add(-1,std::memory_order_acquire); + } + } + + template <class Clock, class Duration> + void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const { + if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) + return; + std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); + while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { + count.fetch_add(1,std::memory_order_release); + __synchronic_wait_timed(&atom,val,remains); + count.fetch_add(-1,std::memory_order_acquire); + remains = then - std::chrono::high_resolution_clock::now(); + } + } + template <class Clock, class Duration> + void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile { + if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) + return; + std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); + while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { + count.fetch_add(1,std::memory_order_release); + __synchronic_wait_timed_volatile(&atom,val,remains); + count.fetch_add(-1,std::memory_order_acquire); + remains = then - std::chrono::high_resolution_clock::now(); + } + } +private: + mutable std::atomic<int> count; +}; +#endif + +template <class T, class Enable = void> +struct __synchronic : public __synchronic_base<T> { + + __synchronic() noexcept = default; + constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { } + __synchronic(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) volatile = delete; +}; + +template <class T> +struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> { + + T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_add(v,m); + this->notify(n); + return t; + } + T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_add(v,m); + this->notify(n); + return t; + } + T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_sub(v,m); + this->notify(n); + return t; + } + T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_sub(v,m); + this->notify(n); + return t; + } + T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_and(v,m); + this->notify(n); + return t; + } + T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_and(v,m); + this->notify(n); + return t; + } + T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_or(v,m); + this->notify(n); + return t; + } + T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_or(v,m); + this->notify(n); + return t; + } + T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_xor(v,m); + this->notify(n); + return t; + } + T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_xor(v,m); + this->notify(n); + return t; + } + + __synchronic() noexcept = default; + constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { } + __synchronic(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) volatile = delete; + + T operator=(T v) volatile noexcept { + auto const t = this->atom = v; + this->notify(); + return t; + } + T operator=(T v) noexcept { + auto const t = this->atom = v; + this->notify(); + return t; + } + T operator++(int) volatile noexcept { + auto const t = ++this->atom; + this->notify(); + return t; + } + T operator++(int) noexcept { + auto const t = ++this->atom; + this->notify(); + return t; + } + T operator--(int) volatile noexcept { + auto const t = --this->atom; + this->notify(); + return t; + } + T operator--(int) noexcept { + auto const t = --this->atom; + this->notify(); + return t; + } + T operator++() volatile noexcept { + auto const t = this->atom++; + this->notify(); + return t; + } + T operator++() noexcept { + auto const t = this->atom++; + this->notify(); + return t; + } + T operator--() volatile noexcept { + auto const t = this->atom--; + this->notify(); + return t; + } + T operator--() noexcept { + auto const t = this->atom--; + this->notify(); + return t; + } + T operator+=(T v) volatile noexcept { + auto const t = this->atom += v; + this->notify(); + return t; + } + T operator+=(T v) noexcept { + auto const t = this->atom += v; + this->notify(); + return t; + } + T operator-=(T v) volatile noexcept { + auto const t = this->atom -= v; + this->notify(); + return t; + } + T operator-=(T v) noexcept { + auto const t = this->atom -= v; + this->notify(); + return t; + } + T operator&=(T v) volatile noexcept { + auto const t = this->atom &= v; + this->notify(); + return t; + } + T operator&=(T v) noexcept { + auto const t = this->atom &= v; + this->notify(); + return t; + } + T operator|=(T v) volatile noexcept { + auto const t = this->atom |= v; + this->notify(); + return t; + } + T operator|=(T v) noexcept { + auto const t = this->atom |= v; + this->notify(); + return t; + } + T operator^=(T v) volatile noexcept { + auto const t = this->atom ^= v; + this->notify(); + return t; + } + T operator^=(T v) noexcept { + auto const t = this->atom ^= v; + this->notify(); + return t; + } +}; + +template <class T> +struct __synchronic<T*> : public __synchronic_base<T*> { + + T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_add(v,m); + this->notify(n); + return t; + } + T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_add(v,m); + this->notify(n); + return t; + } + T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.fetch_sub(v,m); + this->notify(n); + return t; + } + T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.fetch_sub(v,m); + this->notify(n); + return t; + } + + __synchronic() noexcept = default; + constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { } + __synchronic(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) = delete; + __synchronic& operator=(const __synchronic&) volatile = delete; + + T* operator=(T* v) volatile noexcept { + auto const t = this->atom = v; + this->notify(); + return t; + } + T* operator=(T* v) noexcept { + auto const t = this->atom = v; + this->notify(); + return t; + } + T* operator++(int) volatile noexcept { + auto const t = ++this->atom; + this->notify(); + return t; + } + T* operator++(int) noexcept { + auto const t = ++this->atom; + this->notify(); + return t; + } + T* operator--(int) volatile noexcept { + auto const t = --this->atom; + this->notify(); + return t; + } + T* operator--(int) noexcept { + auto const t = --this->atom; + this->notify(); + return t; + } + T* operator++() volatile noexcept { + auto const t = this->atom++; + this->notify(); + return t; + } + T* operator++() noexcept { + auto const t = this->atom++; + this->notify(); + return t; + } + T* operator--() volatile noexcept { + auto const t = this->atom--; + this->notify(); + return t; + } + T* operator--() noexcept { + auto const t = this->atom--; + this->notify(); + return t; + } + T* operator+=(ptrdiff_t v) volatile noexcept { + auto const t = this->atom += v; + this->notify(); + return t; + } + T* operator+=(ptrdiff_t v) noexcept { + auto const t = this->atom += v; + this->notify(); + return t; + } + T* operator-=(ptrdiff_t v) volatile noexcept { + auto const t = this->atom -= v; + this->notify(); + return t; + } + T* operator-=(ptrdiff_t v) noexcept { + auto const t = this->atom -= v; + this->notify(); + return t; + } +}; + +} //namespace Details + +template <class T> +struct synchronic : public Details::__synchronic<T> { + + bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); } + bool is_lock_free() const noexcept { return this->atom.is_lock_free(); } + void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + this->atom.store(v,m); + this->notify(n); + } + void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + this->atom.store(v,m); + this->notify(n); + } + T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); } + T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); } + + operator T() const volatile noexcept { return (T)this->atom; } + operator T() const noexcept { return (T)this->atom; } + + T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.exchange(v,m); + this->notify(n); + return t; + } + T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.exchange(v,m); + this->notify(n); + return t; + } + bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.compare_exchange_weak(r,v,m1,m2); + this->notify(n); + return t; + } + bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { + auto const t = this->atom.compare_exchange_weak(r,v,m1, m2); + this->notify(n); + return t; + } + bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); + this->notify(n); + return t; + } + bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { + auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); + this->notify(n); + return t; + } + bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.compare_exchange_weak(r,v,m); + this->notify(n); + return t; + } + bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.compare_exchange_weak(r,v,m); + this->notify(n); + return t; + } + bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { + auto const t = this->atom.compare_exchange_strong(r,v,m); + this->notify(n); + return t; + } + bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { + auto const t = this->atom.compare_exchange_strong(r,v,m); + this->notify(n); + return t; + } + + synchronic() noexcept = default; + constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { } + synchronic(const synchronic&) = delete; + ~synchronic() { } + synchronic& operator=(const synchronic&) = delete; + synchronic& operator=(const synchronic&) volatile = delete; + T operator=(T val) noexcept { + return Details::__synchronic<T>::operator=(val); + } + T operator=(T val) volatile noexcept { + return Details::__synchronic<T>::operator=(val); + } + + T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { + Details::__synchronic<T>::expect_update(val,h); + return load(order); + } + T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { + Details::__synchronic<T>::expect_update(val,h); + return load(order); + } + T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { + for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) + Details::__synchronic<T>::expect_update(nval,h); + return load(order); + } + T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { + for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) + expect_update(nval,h); + return load(order); + } + template <class Rep, class Period> + void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const { + Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); + } + template < class Rep, class Period> + void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile { + Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); + } +}; + +#include <inttypes.h> + +typedef synchronic<char> synchronic_char; +typedef synchronic<char> synchronic_schar; +typedef synchronic<unsigned char> synchronic_uchar; +typedef synchronic<short> synchronic_short; +typedef synchronic<unsigned short> synchronic_ushort; +typedef synchronic<int> synchronic_int; +typedef synchronic<unsigned int> synchronic_uint; +typedef synchronic<long> synchronic_long; +typedef synchronic<unsigned long> synchronic_ulong; +typedef synchronic<long long> synchronic_llong; +typedef synchronic<unsigned long long> synchronic_ullong; +//typedef synchronic<char16_t> synchronic_char16_t; +//typedef synchronic<char32_t> synchronic_char32_t; +typedef synchronic<wchar_t> synchronic_wchar_t; + +typedef synchronic<int_least8_t> synchronic_int_least8_t; +typedef synchronic<uint_least8_t> synchronic_uint_least8_t; +typedef synchronic<int_least16_t> synchronic_int_least16_t; +typedef synchronic<uint_least16_t> synchronic_uint_least16_t; +typedef synchronic<int_least32_t> synchronic_int_least32_t; +typedef synchronic<uint_least32_t> synchronic_uint_least32_t; +//typedef synchronic<int_least_64_t> synchronic_int_least_64_t; +typedef synchronic<uint_least64_t> synchronic_uint_least64_t; +typedef synchronic<int_fast8_t> synchronic_int_fast8_t; +typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t; +typedef synchronic<int_fast16_t> synchronic_int_fast16_t; +typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t; +typedef synchronic<int_fast32_t> synchronic_int_fast32_t; +typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t; +typedef synchronic<int_fast64_t> synchronic_int_fast64_t; +typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t; +typedef synchronic<intptr_t> synchronic_intptr_t; +typedef synchronic<uintptr_t> synchronic_uintptr_t; +typedef synchronic<size_t> synchronic_size_t; +typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t; +typedef synchronic<intmax_t> synchronic_intmax_t; +typedef synchronic<uintmax_t> synchronic_uintmax_t; + +} +} + +#endif //__SYNCHRONIC_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0a6dd6e715edad752f56756ccdc6fba3d43e30fb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp @@ -0,0 +1,169 @@ +/* + +Copyright (c) 2014, NVIDIA Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef KOKKOS_SYNCHRONIC_CONFIG_H +#define KOKKOS_SYNCHRONIC_CONFIG_H + +#include <thread> +#include <chrono> + +namespace Kokkos { +namespace Impl { + +//the default yield function used inside the implementation is the Standard one +#define __synchronic_yield std::this_thread::yield +#define __synchronic_relax __synchronic_yield + +#if defined(_MSC_VER) + //this is a handy GCC optimization that I use inside the implementation + #define __builtin_expect(condition,common) condition + #if _MSC_VER <= 1800 + //using certain keywords that VC++ temporarily doesn't support + #define _ALLOW_KEYWORD_MACROS + #define noexcept + #define constexpr + #endif + //yes, I define multiple assignment operators + #pragma warning(disable:4522) + //I don't understand how Windows is so bad at timing functions, but is OK + //with straight-up yield loops + #define __do_backoff(b) __synchronic_yield() +#else +#define __do_backoff(b) b.sleep_for_step() +#endif + +//certain platforms have efficient support for spin-waiting built into the operating system +#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602) +#if defined(_WIN32_WINNT) +#include <winsock2.h> +#include <Windows.h> + //the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+ + #define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) + #define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count()) + #define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x) + #define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x) + #define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) + #define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count()) + #define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x) + #define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x) + #define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8)) + + inline void native_sleep(unsigned long microseconds) + { + // What to do if microseconds is < 1000? + Sleep(microseconds / 1000); + } + + inline void native_yield() + { + SwitchToThread(); + } +#elif defined(__linux__) + #include <chrono> + #include <time.h> + #include <unistd.h> + #include <pthread.h> + #include <linux/futex.h> + #include <sys/syscall.h> + #include <climits> + #include <cassert> + template < class Rep, class Period> + inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) { + struct timespec ts; + ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count()); + assert(!ts.tv_sec); + ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count()); + return ts; + } + inline long futex(void const* addr1, int op, int val1) { + return syscall(SYS_futex, addr1, op, val1, 0, 0, 0); + } + inline long futex(void const* addr1, int op, int val1, struct timespec timeout) { + return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0); + } + inline void native_sleep(unsigned long microseconds) + { + usleep(microseconds); + } + inline void native_yield() + { + pthread_yield(); + } + + //the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions + #define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v) + #define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t)) + #define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1) + #define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX) + #define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v) + #define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t)) + #define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1) + #define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX) + #define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4)) + + //the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting + #undef __synchronic_yield + #define __synchronic_yield sched_yield + + //for extremely short wait times, just let another hyper-thread run + #undef __synchronic_relax + #define __synchronic_relax() asm volatile("rep; nop" ::: "memory") + +#endif +#endif + +#ifdef _GLIBCXX_USE_NANOSLEEP +inline void portable_sleep(std::chrono::microseconds const& time) +{ std::this_thread::sleep_for(time); } +#else +inline void portable_sleep(std::chrono::microseconds const& time) +{ native_sleep(time.count()); } +#endif + +#ifdef _GLIBCXX_USE_SCHED_YIELD +inline void portable_yield() +{ std::this_thread::yield(); } +#else +inline void portable_yield() +{ native_yield(); } +#endif + +//this is the number of times we initially spin, on the first wait attempt +#define __SYNCHRONIC_SPIN_COUNT_A 16 + +//this is how decide to yield instead of just spinning, 'c' is the current trip count +//#define __SYNCHRONIC_SPIN_YIELD(c) true +#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3) + +//this is the number of times we normally spin, on every subsequent wait attempt +#define __SYNCHRONIC_SPIN_COUNT_B 8 + +} +} + +#endif //__SYNCHRONIC_CONFIG_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp new file mode 100644 index 0000000000000000000000000000000000000000..facc8d6d8e67a4828aa94bd75fb7590f454b41f6 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp @@ -0,0 +1,162 @@ +/* + +Copyright (c) 2014, NVIDIA Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef KOKKOS_SYNCHRONIC_N3998_HPP +#define KOKKOS_SYNCHRONIC_N3998_HPP + +#include <impl/Kokkos_Synchronic.hpp> +#include <functional> + +/* +In the section below, a synchronization point represents a point at which a +thread may block until a given synchronization condition has been reached or +at which it may notify other threads that a synchronization condition has +been achieved. +*/ +namespace Kokkos { namespace Impl { + + /* + A latch maintains an internal counter that is initialized when the latch + is created. The synchronization condition is reached when the counter is + decremented to 0. Threads may block at a synchronization point waiting + for the condition to be reached. When the condition is reached, any such + blocked threads will be released. + */ + struct latch { + latch(int val) : count(val), released(false) { } + latch(const latch&) = delete; + latch& operator=(const latch&) = delete; + ~latch( ) { } + void arrive( ) { + __arrive( ); + } + void arrive_and_wait( ) { + if(!__arrive( )) + wait( ); + } + void wait( ) { + while(!released.load_when_not_equal(false,std::memory_order_acquire)) + ; + } + bool try_wait( ) { + return released.load(std::memory_order_acquire); + } + private: + bool __arrive( ) { + if(count.fetch_add(-1,std::memory_order_release)!=1) + return false; + released.store(true,std::memory_order_release); + return true; + } + std::atomic<int> count; + synchronic<bool> released; + }; + + /* + A barrier is created with an initial value representing the number of threads + that can arrive at the synchronization point. When that many threads have + arrived, the synchronization condition is reached and the threads are + released. The barrier will then reset, and may be reused for a new cycle, in + which the same set of threads may arrive again at the synchronization point. + The same set of threads shall arrive at the barrier in each cycle, otherwise + the behaviour is undefined. + */ + struct barrier { + barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { } + barrier(const barrier&) = delete; + barrier& operator=(const barrier&) = delete; + ~barrier() { } + void arrive_and_wait() { + int const myepoch = epoch.load(std::memory_order_relaxed); + if(!__arrive(myepoch)) + while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) + ; + } + void arrive_and_drop() { + nexpected.fetch_add(-1,std::memory_order_relaxed); + __arrive(epoch.load(std::memory_order_relaxed)); + } + private: + bool __arrive(int const myepoch) { + int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; + if(__builtin_expect(myresult == expected,0)) { + expected = nexpected.load(std::memory_order_relaxed); + arrived.store(0,std::memory_order_relaxed); + epoch.store(myepoch+1,std::memory_order_release); + return true; + } + return false; + } + int expected; + std::atomic<int> arrived, nexpected; + synchronic<int> epoch; + }; + + /* + A notifying barrier behaves as a barrier, but is constructed with a callable + completion function that is invoked after all threads have arrived at the + synchronization point, and before the synchronization condition is reached. + The completion may modify the set of threads that arrives at the barrier in + each cycle. + */ + struct notifying_barrier { + template <typename T> + notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { } + notifying_barrier(const notifying_barrier&) = delete; + notifying_barrier& operator=(const notifying_barrier&) = delete; + ~notifying_barrier( ) { } + void arrive_and_wait() { + int const myepoch = epoch.load(std::memory_order_relaxed); + if(!__arrive(myepoch)) + while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) + ; + } + void arrive_and_drop() { + nexpected.fetch_add(-1,std::memory_order_relaxed); + __arrive(epoch.load(std::memory_order_relaxed)); + } + private: + bool __arrive(int const myepoch) { + int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; + if(__builtin_expect(myresult == expected,0)) { + int const newexpected = completion(); + expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed); + arrived.store(0,std::memory_order_relaxed); + epoch.store(myepoch+1,std::memory_order_release); + return true; + } + return false; + } + int expected; + std::atomic<int> arrived, nexpected; + synchronic<int> epoch; + std::function<int()> completion; + }; +}} + +#endif //__N3998_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0bc2864ff1d9079f47ec4369f25388794aa52f71 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp @@ -0,0 +1,198 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TAGS_HPP +#define KOKKOS_TAGS_HPP + +#include <impl/Kokkos_Traits.hpp> +#include <Kokkos_Core_fwd.hpp> +#include <type_traits> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/** KOKKOS_HAVE_TYPE( Type ) + * + * defines a meta-function that check if a type expose an internal typedef or + * type alias which matches Type + * + * e.g. + * KOKKOS_HAVE_TYPE( array_layout ); + * struct Foo { using array_layout = void; }; + * have_array_layout<Foo>::value == 1; + */ +#define KOKKOS_HAVE_TYPE( Type ) \ +template <typename T> \ +struct have_##Type { \ + template <typename U> static std::false_type have_type(...); \ + template <typename U> static std::true_type have_type( typename U::Type* ); \ + using type = decltype(have_type<T>(nullptr)); \ + static constexpr bool value = type::value; \ +} + +/** KOKKOS_IS_CONCEPT( Concept ) + * + * defines a meta-function that check if a type match the given Kokkos concept + * type alias which matches Type + * + * e.g. + * KOKKOS_IS_CONCEPT( array_layout ); + * struct Foo { using array_layout = Foo; }; + * is_array_layout<Foo>::value == 1; + */ +#define KOKKOS_IS_CONCEPT( Concept ) \ +template <typename T> \ +struct is_##Concept { \ + template <typename U> static std::false_type have_concept(...); \ + template <typename U> static auto have_concept( typename U::Concept* ) \ + ->typename std::is_same<T, typename U::Concept>::type;\ + using type = decltype(have_concept<T>(nullptr)); \ + static constexpr bool value = type::value; \ +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { namespace Impl { + +template <typename T> +using is_void = std::is_same<void,T>; + +// is_memory_space<T>::value +KOKKOS_IS_CONCEPT( memory_space ); + +// is_memory_traits<T>::value +KOKKOS_IS_CONCEPT( memory_traits ); + +// is_execution_space<T>::value +KOKKOS_IS_CONCEPT( execution_space ); + +// is_execution_policy<T>::value +KOKKOS_IS_CONCEPT( execution_policy ); + +// is_array_layout<T>::value +KOKKOS_IS_CONCEPT( array_layout ); + +// is_iteration_pattern<T>::value +KOKKOS_IS_CONCEPT( iteration_pattern ); + +// is_schedule_type<T>::value +KOKKOS_IS_CONCEPT( schedule_type ); + +// is_index_type<T>::value +KOKKOS_IS_CONCEPT( index_type ); + +}} // namespace Kokkos::Impl + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class ExecutionSpace , class MemorySpace > +struct Device { + static_assert( Impl::is_execution_space<ExecutionSpace>::value + , "Execution space is not valid" ); + static_assert( Impl::is_memory_space<MemorySpace>::value + , "Memory space is not valid" ); + typedef ExecutionSpace execution_space; + typedef MemorySpace memory_space; + typedef Device<execution_space,memory_space> device_type; +}; +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class C , class Enable = void > +struct is_space : public Impl::false_type {}; + +template< class C > +struct is_space< C + , typename Impl::enable_if<( + Impl::is_same< C , typename C::execution_space >::value || + Impl::is_same< C , typename C::memory_space >::value || + Impl::is_same< C , Device< + typename C::execution_space, + typename C::memory_space> >::value + )>::type + > + : public Impl::true_type +{ + typedef typename C::execution_space execution_space ; + typedef typename C::memory_space memory_space ; + + // The host_memory_space defines a space with host-resident memory. + // If the execution space's memory space is host accessible then use that execution space. + // else use the HostSpace. + typedef + typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value +#ifdef KOKKOS_HAVE_CUDA + || Impl::is_same< memory_space , CudaUVMSpace>::value + || Impl::is_same< memory_space , CudaHostPinnedSpace>::value +#endif + , memory_space , HostSpace >::type + host_memory_space ; + + // The host_execution_space defines a space which has access to HostSpace. + // If the execution space can access HostSpace then use that execution space. + // else use the DefaultHostExecutionSpace. +#ifdef KOKKOS_HAVE_CUDA + typedef + typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value + , DefaultHostExecutionSpace , execution_space >::type + host_execution_space ; +#else + typedef execution_space host_execution_space; +#endif + + typedef Device<host_execution_space,host_memory_space> host_mirror_space; +}; +} +} + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp new file mode 100644 index 0000000000000000000000000000000000000000..663bb1985d3636e84e236660b1c58fda5579cccc --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -0,0 +1,499 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKQUEUE_HPP +#define KOKKOS_IMPL_TASKQUEUE_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <string> +#include <typeinfo> +#include <stdexcept> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename > class TaskPolicy ; + +template< typename Arg1 = void , typename Arg2 = void > class Future ; + +} /* namespace Kokkos */ + +namespace Kokkos { +namespace Impl { + +template< typename , typename , typename > class TaskBase ; +template< typename > class TaskExec ; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename Space > +class TaskQueueSpecialization ; + +/** \brief Manage task allocation, deallocation, and scheduling. + * + * Task execution is deferred to the TaskQueueSpecialization. + * All other aspects of task management have shared implementation. + */ +template< typename ExecSpace > +class TaskQueue { +private: + + friend class TaskQueueSpecialization< ExecSpace > ; + friend class Kokkos::TaskPolicy< ExecSpace > ; + + using execution_space = ExecSpace ; + using specialization = TaskQueueSpecialization< execution_space > ; + using memory_space = typename specialization::memory_space ; + using device_type = Kokkos::Device< execution_space , memory_space > ; + using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ; + using task_root_type = Kokkos::Impl::TaskBase<execution_space,void,void> ; + + struct Destroy { + TaskQueue * m_queue ; + void destroy_shared_allocation(); + }; + + //---------------------------------------- + + enum : int { NumQueue = 3 }; + + // Queue is organized as [ priority ][ type ] + + memory_pool m_memory ; + task_root_type * volatile m_ready[ NumQueue ][ 2 ]; + long m_accum_alloc ; // Accumulated number of allocations + int m_count_alloc ; // Current number of allocations + int m_max_alloc ; // Maximum number of allocations + int m_ready_count ; // Number of ready or executing + + //---------------------------------------- + + ~TaskQueue(); + TaskQueue() = delete ; + TaskQueue( TaskQueue && ) = delete ; + TaskQueue( TaskQueue const & ) = delete ; + TaskQueue & operator = ( TaskQueue && ) = delete ; + TaskQueue & operator = ( TaskQueue const & ) = delete ; + + TaskQueue + ( const memory_space & arg_space + , unsigned const arg_memory_pool_capacity + , unsigned const arg_memory_pool_superblock_capacity_log2 + ); + + // Schedule a task + // Precondition: + // task is not executing + // task->m_next is the dependence or zero + // Postcondition: + // task->m_next is linked list membership + KOKKOS_FUNCTION + void schedule( task_root_type * const ); + + // Complete a task + // Precondition: + // task is not executing + // task->m_next == LockTag => task is complete + // task->m_next != LockTag => task is respawn + // Postcondition: + // task->m_wait == LockTag => task is complete + // task->m_wait != LockTag => task is waiting + KOKKOS_FUNCTION + void complete( task_root_type * ); + + KOKKOS_FUNCTION + static bool push_task( task_root_type * volatile * const + , task_root_type * const ); + + KOKKOS_FUNCTION + static task_root_type * pop_task( task_root_type * volatile * const ); + + KOKKOS_FUNCTION static + void decrement( task_root_type * task ); + +public: + + // If and only if the execution space is a single thread + // then execute ready tasks. + KOKKOS_INLINE_FUNCTION + void iff_single_thread_recursive_execute() + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + specialization::iff_single_thread_recursive_execute( this ); +#endif + } + + void execute() { specialization::execute( this ); } + + // Assign task pointer with reference counting of assigned tasks + template< typename LV , typename RV > + KOKKOS_FUNCTION static + void assign( TaskBase< execution_space,LV,void> ** const lhs + , TaskBase< execution_space,RV,void> * const rhs ) + { + using task_lhs = TaskBase< execution_space,LV,void> ; +#if 0 + { + printf( "assign( 0x%lx { 0x%lx %d %d } , 0x%lx { 0x%lx %d %d } )\n" + , uintptr_t( lhs ? *lhs : 0 ) + , uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 ) + , int( lhs && *lhs ? (*lhs)->m_task_type : 0 ) + , int( lhs && *lhs ? (*lhs)->m_ref_count : 0 ) + , uintptr_t(rhs) + , uintptr_t( rhs ? rhs->m_next : 0 ) + , int( rhs ? rhs->m_task_type : 0 ) + , int( rhs ? rhs->m_ref_count : 0 ) + ); + fflush( stdout ); + } +#endif + + if ( *lhs ) decrement( *lhs ); + if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); } + + // Force write of *lhs + + *static_cast< task_lhs * volatile * >(lhs) = rhs ; + + Kokkos::memory_fence(); + } + + KOKKOS_FUNCTION + size_t allocate_block_size( size_t n ); ///< Actual block size allocated + + KOKKOS_FUNCTION + void * allocate( size_t n ); ///< Allocate from the memory pool + + KOKKOS_FUNCTION + void deallocate( void * p , size_t n ); ///< Deallocate to the memory pool +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskBase< void , void , void > { +public: + enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; + enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; +}; + +/** \brief Base class for task management, access, and execution. + * + * Inheritance structure to allow static_cast from the task root type + * and a task's FunctorType. + * + * // Enable a Future to access result data + * TaskBase< Space , ResultType , void > + * : TaskBase< void , void , void > + * { ... }; + * + * // Enable a functor to access the base class + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * + * States of a task: + * + * Constructing State, NOT IN a linked list + * m_wait == 0 + * m_next == 0 + * + * Scheduling transition : Constructing -> Waiting + * before: + * m_wait == 0 + * m_next == this task's initial dependence, 0 if none + * after: + * m_wait == EndTag + * m_next == EndTag + * + * Waiting State, IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == next of linked list of tasks + * + * transition : Waiting -> Executing + * before: + * m_next == EndTag + * after:: + * m_next == LockTag + * + * Executing State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == LockTag + * + * Respawn transition : Executing -> Executing-Respawn + * before: + * m_next == LockTag + * after: + * m_next == this task's updated dependence, 0 if none + * + * Executing-Respawn State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == this task's updated dependence, 0 if none + * + * transition : Executing -> Complete + * before: + * m_wait == head of linked list + * after: + * m_wait == LockTag + * + * Complete State, NOT IN a linked list + * m_wait == LockTag: cannot add dependence + * m_next == LockTag: not a member of a wait queue + * + */ +template< typename ExecSpace > +class TaskBase< ExecSpace , void , void > +{ +public: + + enum : int16_t { TaskTeam = TaskBase<void,void,void>::TaskTeam + , TaskSingle = TaskBase<void,void,void>::TaskSingle + , Aggregate = TaskBase<void,void,void>::Aggregate }; + + enum : uintptr_t { LockTag = TaskBase<void,void,void>::LockTag + , EndTag = TaskBase<void,void,void>::EndTag }; + + using execution_space = ExecSpace ; + using queue_type = TaskQueue< execution_space > ; + + template< typename > friend class Kokkos::TaskPolicy ; + + typedef void (* function_type) ( TaskBase * , void * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply ; ///< Apply function pointer + queue_type * m_queue ; ///< Queue in which this task resides + TaskBase * m_wait ; ///< Linked list of tasks waiting on this + TaskBase * m_next ; ///< Waiting linked-list next + int32_t m_ref_count ; ///< Reference count + int32_t m_alloc_size ;///< Allocation size + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + + KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + + KOKKOS_INLINE_FUNCTION + constexpr TaskBase() noexcept + : m_apply(0) + , m_queue(0) + , m_wait(0) + , m_next(0) + , m_ref_count(0) + , m_alloc_size(0) + , m_dep_count(0) + , m_task_type( TaskSingle ) + , m_priority( 1 /* TaskRegularPriority */ ) + {} + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskBase ** aggregate_dependences() + { return reinterpret_cast<TaskBase**>( this + 1 ); } + + using get_return_type = void ; + + KOKKOS_INLINE_FUNCTION + get_return_type get() const {} +}; + +template < typename ExecSpace , typename ResultType > +class TaskBase< ExecSpace , ResultType , void > + : public TaskBase< ExecSpace , void , void > +{ +private: + + static_assert( sizeof(TaskBase<ExecSpace,void,void>) == 48 , "" ); + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + +public: + + ResultType m_result ; + + KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + + KOKKOS_INLINE_FUNCTION + TaskBase() + : TaskBase< ExecSpace , void , void >() + , m_result() + {} + + using get_return_type = ResultType const & ; + + KOKKOS_INLINE_FUNCTION + get_return_type get() const { return m_result ; } +}; + + +template< typename ExecSpace , typename ResultType , typename FunctorType > +class TaskBase + : public TaskBase< ExecSpace , ResultType , void > + , public FunctorType +{ +private: + + TaskBase() = delete ; + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + +public: + + using root_type = TaskBase< ExecSpace , void , void > ; + using base_type = TaskBase< ExecSpace , ResultType , void > ; + using member_type = TaskExec< ExecSpace > ; + using functor_type = FunctorType ; + using result_type = ResultType ; + + template< typename Type > + KOKKOS_INLINE_FUNCTION static + void apply_functor + ( Type * const task + , typename std::enable_if + < std::is_same< typename Type::result_type , void >::value + , member_type * const + >::type member + ) + { + using fType = typename Type::functor_type ; + static_cast<fType*>(task)->operator()( *member ); + } + + template< typename Type > + KOKKOS_INLINE_FUNCTION static + void apply_functor + ( Type * const task + , typename std::enable_if + < ! std::is_same< typename Type::result_type , void >::value + , member_type * const + >::type member + ) + { + using fType = typename Type::functor_type ; + static_cast<fType*>(task)->operator()( *member , task->m_result ); + } + + KOKKOS_FUNCTION static + void apply( root_type * root , void * exec ) + { + TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag ); + TaskBase * const task = static_cast< TaskBase * >( root ); + member_type * const member = reinterpret_cast< member_type * >( exec ); + + TaskBase::template apply_functor( task , member ); + + // Task may be serial or team. + // If team then must synchronize before querying task->m_next. + // If team then only one thread calls destructor. + + member->team_barrier(); + + if ( 0 == member->team_rank() && lock == task->m_next ) { + // Did not respawn, destroy the functor to free memory + static_cast<functor_type*>(task)->~functor_type(); + // Cannot destroy the task until its dependences + // have been processed. + } + } + + KOKKOS_INLINE_FUNCTION + TaskBase( FunctorType const & arg_functor ) + : base_type() + , FunctorType( arg_functor ) + {} + + KOKKOS_INLINE_FUNCTION + ~TaskBase() {} +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..70a880d4a2e341a9f8e78df97c57531ca53492f6 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -0,0 +1,569 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() +{ + m_queue->~TaskQueue(); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::TaskQueue + ( const TaskQueue< ExecSpace >::memory_space & arg_space + , unsigned const arg_memory_pool_capacity + , unsigned const arg_memory_pool_superblock_capacity_log2 + ) + : m_memory( arg_space + , arg_memory_pool_capacity + , arg_memory_pool_superblock_capacity_log2 ) + , m_ready() + , m_accum_alloc(0) + , m_max_alloc(0) + , m_ready_count(0) +{ + for ( int i = 0 ; i < NumQueue ; ++i ) { + m_ready[i][0] = (task_root_type *) task_root_type::EndTag ; + m_ready[i][1] = (task_root_type *) task_root_type::EndTag ; + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::~TaskQueue() +{ + // Verify that queues are empty and ready count is zero + + for ( int i = 0 ; i < NumQueue ; ++i ) { + for ( int j = 0 ; j < 2 ; ++j ) { + if ( m_ready[i][j] != (task_root_type *) task_root_type::EndTag ) { + Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready tasks"); + } + } + } + + if ( 0 != m_ready_count ) { + Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks"); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::decrement + ( TaskQueue< ExecSpace >::task_root_type * task ) +{ + const int count = Kokkos::atomic_fetch_add(&(task->m_ref_count),-1); + +#if 0 + if ( 1 == count ) { + printf( "decrement-destroy( 0x%lx { 0x%lx %d %d } )\n" + , uintptr_t( task ) + , uintptr_t( task->m_next ) + , int( task->m_task_type ) + , int( task->m_ref_count ) + ); + } +#endif + + if ( ( 1 == count ) && + ( task->m_next == (task_root_type *) task_root_type::LockTag ) ) { + // Reference count is zero and task is complete, deallocate. + task->m_queue->deallocate( task , task->m_alloc_size ); + } + else if ( count <= 1 ) { + Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" ); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n ) +{ + return m_memory.allocate_block_size( n ); +} + +template< typename ExecSpace > +KOKKOS_FUNCTION +void * TaskQueue< ExecSpace >::allocate( size_t n ) +{ + void * const p = m_memory.allocate(n); + + if ( p ) { + Kokkos::atomic_increment( & m_accum_alloc ); + Kokkos::atomic_increment( & m_count_alloc ); + + if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; + } + + return p ; +} + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::deallocate( void * p , size_t n ) +{ + m_memory.deallocate( p , n ); + Kokkos::atomic_decrement( & m_count_alloc ); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +bool TaskQueue< ExecSpace >::push_task + ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue + , TaskQueue< ExecSpace >::task_root_type * const task + ) +{ + // Push task into a concurrently pushed and popped queue. + // The queue is a linked list where 'task->m_next' form the links. + // Fail the push attempt if the queue is locked; + // otherwise retry until the push succeeds. + +#if 0 + printf( "push_task( 0x%lx { 0x%lx } 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" + , uintptr_t(queue) + , uintptr_t(*queue) + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + + task_root_type * volatile * const next = & task->m_next ; + + if ( zero != *next ) { + Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" ); + } + + task_root_type * y = *queue ; + + while ( lock != y ) { + + *next = y ; + + // Do not proceed until '*next' has been stored. + Kokkos::memory_fence(); + + task_root_type * const x = y ; + + y = Kokkos::atomic_compare_exchange(queue,y,task); + + if ( x == y ) return true ; + } + + // Failed, replace 'task->m_next' value since 'task' remains + // not a member of a queue. + + *next = zero ; + + // Do not proceed until '*next' has been stored. + Kokkos::memory_fence(); + + return false ; +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +typename TaskQueue< ExecSpace >::task_root_type * +TaskQueue< ExecSpace >::pop_task + ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue ) +{ + // Pop task from a concurrently pushed and popped queue. + // The queue is a linked list where 'task->m_next' form the links. + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + // *queue is + // end => an empty queue + // lock => a locked queue + // valid + + // Retry until the lock is acquired or the queue is empty. + + task_root_type * task = *queue ; + + while ( end != task ) { + + // The only possible values for the queue are + // (1) lock, (2) end, or (3) a valid task. + // Thus zero will never appear in the queue. + // + // If queue is locked then just read by guaranteeing + // the CAS will fail. + + if ( lock == task ) task = 0 ; + + task_root_type * const x = task ; + + task = Kokkos::atomic_compare_exchange(queue,task,lock); + + if ( x == task ) break ; // CAS succeeded and queue is locked + } + + if ( end != task ) { + + // This thread has locked the queue and removed 'task' from the queue. + // Extract the next entry of the queue from 'task->m_next' + // and mark 'task' as popped from a queue by setting + // 'task->m_next = lock'. + + task_root_type * const next = + Kokkos::atomic_exchange( & task->m_next , lock ); + + // Place the next entry in the head of the queue, + // which also unlocks the queue. + + task_root_type * const unlock = + Kokkos::atomic_exchange( queue , next ); + + if ( next == zero || next == lock || lock != unlock ) { + Kokkos::abort("TaskQueue::pop_task ERROR"); + } + } + +#if 0 + if ( end != task ) { + printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" + , uintptr_t(queue) + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , int(task->m_task_type) + , int(task->m_priority) + , int(task->m_ref_count) ); + } +#endif + + return task ; +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::schedule + ( TaskQueue< ExecSpace >::task_root_type * const task ) +{ + // Schedule a runnable or when_all task upon construction / spawn + // and upon completion of other tasks that 'task' is waiting on. + + // Precondition on runnable task state: + // task is either constructing or executing + // + // Constructing state: + // task->m_wait == 0 + // task->m_next == dependence + // Executing-respawn state: + // task->m_wait == head of linked list + // task->m_next == dependence + // + // Task state transition: + // Constructing -> Waiting + // Executing-respawn -> Waiting + // + // Postcondition on task state: + // task->m_wait == head of linked list + // task->m_next == member of linked list + +#if 0 + printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + //---------------------------------------- + { + // If Constructing then task->m_wait == 0 + // Change to waiting by task->m_wait = EndTag + + task_root_type * const init = + Kokkos::atomic_compare_exchange( & task->m_wait , zero , end ); + + // Precondition + + if ( lock == init ) { + Kokkos::abort("TaskQueue::schedule ERROR: task is complete"); + } + + // if ( init == 0 ) Constructing -> Waiting + // else Executing-Respawn -> Waiting + } + //---------------------------------------- + + if ( task_root_type::Aggregate != task->m_task_type ) { + + // Scheduling a runnable task which may have a depencency 'dep'. + // Extract dependence, if any, from task->m_next. + // If 'dep' is not null then attempt to push 'task' + // into the wait queue of 'dep'. + // If the push succeeds then 'task' may be + // processed or executed by another thread at any time. + // If the push fails then 'dep' is complete and 'task' + // is ready to execute. + + task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); + + const bool is_ready = + ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); + + // Reference count for dep was incremented when assigned + // to task->m_next so that if it completed prior to the + // above push_task dep would not be destroyed. + // dep reference count can now be decremented, + // which may deallocate the task. + TaskQueue::assign( & dep , (task_root_type *)0 ); + + if ( is_ready ) { + + // No dependence or 'dep' is complete so push task into ready queue. + // Increment the ready count before pushing into ready queue + // to track number of ready + executing tasks. + // The ready count will be decremented when the task is complete. + + Kokkos::atomic_increment( & m_ready_count ); + + task_root_type * volatile * const queue = + & m_ready[ task->m_priority ][ task->m_task_type ]; + + // A push_task fails if the ready queue is locked. + // A ready queue is only locked during a push or pop; + // i.e., it is never permanently locked. + // Retry push to ready queue until it succeeds. + // When the push succeeds then 'task' may be + // processed or executed by another thread at any time. + + while ( ! push_task( queue , task ) ); + } + } + //---------------------------------------- + else { + // Scheduling a 'when_all' task with multiple dependences. + // This scheduling may be called when the 'when_all' is + // (1) created or + // (2) being removed from a completed task's wait list. + + task_root_type ** const aggr = task->aggregate_dependences(); + + // Assume the 'when_all' is complete until a dependence is + // found that is not complete. + + bool is_complete = true ; + + for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { + + --i ; + + // Loop dependences looking for an incomplete task. + // Add this task to the incomplete task's wait queue. + + // Remove a task 'x' from the dependence list. + // The reference count of 'x' was incremented when + // it was assigned into the dependence list. + + task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero ); + + if ( x ) { + + // If x->m_wait is not locked then push succeeds + // and the aggregate is not complete. + // If the push succeeds then this when_all 'task' may be + // processed by another thread at any time. + // For example, 'x' may be completeed by another + // thread and then re-schedule this when_all 'task'. + + is_complete = ! push_task( & x->m_wait , task ); + + // Decrement reference count which had been incremented + // when 'x' was added to the dependence list. + + TaskQueue::assign( & x , zero ); + } + } + + if ( is_complete ) { + // The when_all 'task' was not added to a wait queue because + // all dependences were complete so this aggregate is complete. + // Complete the when_all 'task' to schedule other tasks + // that are waiting for the when_all 'task' to complete. + + task->m_next = lock ; + + complete( task ); + + // '*task' may have been deleted upon completion + } + } + //---------------------------------------- + // Postcondition: + // A runnable 'task' was pushed into a wait or ready queue. + // An aggregate 'task' was either pushed to a wait queue + // or completed. + // Concurrent execution may have already popped 'task' + // from a queue and processed it as appropriate. +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::complete + ( TaskQueue< ExecSpace >::task_root_type * task ) +{ + // Complete a runnable task that has finished executing + // or a when_all task when all of its dependeneces are complete. + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + +#if 0 + printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); + fflush( stdout ); +#endif + + const bool runnable = task_root_type::Aggregate != task->m_task_type ; + + //---------------------------------------- + + if ( runnable && lock != task->m_next ) { + // Is a runnable task has finished executing and requested respawn. + // Schedule the task for subsequent execution. + + schedule( task ); + } + //---------------------------------------- + else { + // Is either an aggregate or a runnable task that executed + // and did not respawn. Transition this task to complete. + + // If 'task' is an aggregate then any of the runnable tasks that + // it depends upon may be attempting to complete this 'task'. + // Must only transition a task once to complete status. + // This is controled by atomically locking the wait queue. + + // Stop other tasks from adding themselves to this task's wait queue + // by locking the head of this task's wait queue. + + task_root_type * x = Kokkos::atomic_exchange( & task->m_wait , lock ); + + if ( x != (task_root_type *) lock ) { + + // This thread has transitioned this 'task' to complete. + // 'task' is no longer in a queue and is not executing + // so decrement the reference count from 'task's creation. + // If no other references to this 'task' then it will be deleted. + + TaskQueue::assign( & task , zero ); + + // This thread has exclusive access to the wait list so + // the concurrency-safe pop_task function is not needed. + // Schedule the tasks that have been waiting on the input 'task', + // which may have been deleted. + + while ( x != end ) { + + // Set x->m_next = zero <= no dependence + + task_root_type * const next = + (task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero ); + + schedule( x ); + + x = next ; + } + } + } + + if ( runnable ) { + // A runnable task was popped from a ready queue and executed. + // If respawned into a ready queue then the ready count was incremented + // so decrement whether respawned or not. + Kokkos::atomic_decrement( & m_ready_count ); + } +} + +//---------------------------------------------------------------------------- + +} /* namespace Impl */ +} /* namespace Kokkos */ + + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Timer.hpp b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1f14e42874bda3c43f5f18bced120d73366abd40 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPLWALLTIME_HPP +#define KOKKOS_IMPLWALLTIME_HPP + +#include <stddef.h> + +#ifdef _MSC_VER +#undef KOKKOS_USE_LIBRT +#include <gettimeofday.c> +#else +#ifdef KOKKOS_USE_LIBRT +#include <ctime> +#else +#include <sys/time.h> +#endif +#endif + +namespace Kokkos { +namespace Impl { + +/** \brief Time since construction */ + +class Timer { +private: + #ifdef KOKKOS_USE_LIBRT + struct timespec m_old; + #else + struct timeval m_old ; + #endif + Timer( const Timer & ); + Timer & operator = ( const Timer & ); +public: + + inline + void reset() { + #ifdef KOKKOS_USE_LIBRT + clock_gettime(CLOCK_REALTIME, &m_old); + #else + gettimeofday( & m_old , ((struct timezone *) NULL ) ); + #endif + } + + inline + ~Timer() {} + + inline + Timer() { reset(); } + + inline + double seconds() const + { + #ifdef KOKKOS_USE_LIBRT + struct timespec m_new; + clock_gettime(CLOCK_REALTIME, &m_new); + + return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + + ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 ); + #else + struct timeval m_new ; + + ::gettimeofday( & m_new , ((struct timezone *) NULL ) ); + + return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + + ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 ); + #endif + } +}; + +} // namespace Impl + + using Kokkos::Impl::Timer ; + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp new file mode 100644 index 0000000000000000000000000000000000000000..278f715bc917adf0dc2d8b93cfde7549c6febf7a --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -0,0 +1,501 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSTRAITS_HPP +#define KOKKOSTRAITS_HPP + +#include <stddef.h> +#include <stdint.h> +#include <Kokkos_Macros.hpp> +#include <string> +#include <type_traits> + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +// Help with C++11 variadic argument packs + +template< unsigned I , typename ... Pack > +struct get_type { typedef void type ; }; + +template< typename T , typename ... Pack > +struct get_type< 0 , T , Pack ... > +{ typedef T type ; }; + +template< unsigned I , typename T , typename ... Pack > +struct get_type< I , T , Pack ... > +{ typedef typename get_type< I - 1 , Pack ... >::type type ; }; + + +template< typename T , typename ... Pack > +struct has_type { enum { value = false }; }; + +template< typename T , typename S , typename ... Pack > +struct has_type<T,S,Pack...> +{ +private: + + enum { self_value = std::is_same<T,S>::value }; + + typedef has_type<T,Pack...> next ; + + static_assert( ! ( self_value && next::value ) + , "Error: more than one member of the argument pack matches the type" ); + +public: + + enum { value = self_value || next::value }; + +}; + + +template< typename DefaultType + , template< typename > class Condition + , typename ... Pack > +struct has_condition +{ + enum { value = false }; + typedef DefaultType type ; +}; + +template< typename DefaultType + , template< typename > class Condition + , typename S + , typename ... Pack > +struct has_condition< DefaultType , Condition , S , Pack... > +{ +private: + + enum { self_value = Condition<S>::value }; + + typedef has_condition< DefaultType , Condition , Pack... > next ; + + static_assert( ! ( self_value && next::value ) + , "Error: more than one member of the argument pack satisfies condition" ); + +public: + + enum { value = self_value || next::value }; + + typedef typename + std::conditional< self_value , S , typename next::type >::type + type ; +}; + + +template< class ... Args > +struct are_integral { enum { value = true }; }; + +template< typename T , class ... Args > +struct are_integral<T,Args...> { + enum { value = + // Accept std::is_integral OR std::is_enum as an integral value + // since a simple enum value is automically convertable to an + // integral value. + ( std::is_integral<T>::value || std::is_enum<T>::value ) + && + are_integral<Args...>::value }; +}; + +//---------------------------------------------------------------------------- +/* C++11 conformal compile-time type traits utilities. + * Prefer to use C++11 when portably available. + */ +//---------------------------------------------------------------------------- +// C++11 Helpers: + +template < class T , T v > +struct integral_constant +{ + // Declaration of 'static const' causes an unresolved linker symbol in debug + // static const T value = v ; + enum { value = T(v) }; + typedef T value_type; + typedef integral_constant<T,v> type; + KOKKOS_INLINE_FUNCTION operator T() { return v ; } +}; + +typedef integral_constant<bool,false> false_type ; +typedef integral_constant<bool,true> true_type ; + +//---------------------------------------------------------------------------- +// C++11 Type relationships: + +template< class X , class Y > struct is_same : public false_type {}; +template< class X > struct is_same<X,X> : public true_type {}; + +//---------------------------------------------------------------------------- +// C++11 Type properties: + +template <typename T> struct is_const : public false_type {}; +template <typename T> struct is_const<const T> : public true_type {}; +template <typename T> struct is_const<const T & > : public true_type {}; + +template <typename T> struct is_array : public false_type {}; +template <typename T> struct is_array< T[] > : public true_type {}; +template <typename T, unsigned N > struct is_array< T[N] > : public true_type {}; + +//---------------------------------------------------------------------------- +// C++11 Type transformations: + +template <typename T> struct remove_const { typedef T type; }; +template <typename T> struct remove_const<const T> { typedef T type; }; +template <typename T> struct remove_const<const T & > { typedef T & type; }; + +template <typename T> struct add_const { typedef const T type; }; +template <typename T> struct add_const<T & > { typedef const T & type; }; +template <typename T> struct add_const<const T> { typedef const T type; }; +template <typename T> struct add_const<const T & > { typedef const T & type; }; + +template <typename T> struct remove_reference { typedef T type ; }; +template <typename T> struct remove_reference< T & > { typedef T type ; }; +template <typename T> struct remove_reference< const T & > { typedef const T type ; }; + +template <typename T> struct remove_extent { typedef T type ; }; +template <typename T> struct remove_extent<T[]> { typedef T type ; }; +template <typename T, unsigned N > struct remove_extent<T[N]> { typedef T type ; }; + +//---------------------------------------------------------------------------- +// C++11 Other type generators: + +template< bool , class T , class F > +struct condition { typedef F type ; }; + +template< class T , class F > +struct condition<true,T,F> { typedef T type ; }; + +template< bool , class = void > +struct enable_if ; + +template< class T > +struct enable_if< true , T > { typedef T type ; }; + +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Other traits + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< class , class T = void > +struct enable_if_type { typedef T type ; }; + +//---------------------------------------------------------------------------- + +template< bool B > +struct bool_ : public integral_constant<bool,B> {}; + +template< unsigned I > +struct unsigned_ : public integral_constant<unsigned,I> {}; + +template< int I > +struct int_ : public integral_constant<int,I> {}; + +typedef bool_<true> true_; +typedef bool_<false> false_; +//---------------------------------------------------------------------------- +// if_ + +template < bool Cond , typename TrueType , typename FalseType> +struct if_c +{ + enum { value = Cond }; + + typedef FalseType type; + + + typedef typename remove_const< + typename remove_reference<type>::type >::type value_type ; + + typedef typename add_const<value_type>::type const_value_type ; + + static KOKKOS_INLINE_FUNCTION + const_value_type & select( const_value_type & v ) { return v ; } + + static KOKKOS_INLINE_FUNCTION + value_type & select( value_type & v ) { return v ; } + + template< class T > + static KOKKOS_INLINE_FUNCTION + value_type & select( const T & ) { value_type * ptr(0); return *ptr ; } + + + template< class T > + static KOKKOS_INLINE_FUNCTION + const_value_type & select( const T & , const_value_type & v ) { return v ; } + + template< class T > + static KOKKOS_INLINE_FUNCTION + value_type & select( const T & , value_type & v ) { return v ; } +}; + +template <typename TrueType, typename FalseType> +struct if_c< true , TrueType , FalseType > +{ + enum { value = true }; + + typedef TrueType type; + + + typedef typename remove_const< + typename remove_reference<type>::type >::type value_type ; + + typedef typename add_const<value_type>::type const_value_type ; + + static KOKKOS_INLINE_FUNCTION + const_value_type & select( const_value_type & v ) { return v ; } + + static KOKKOS_INLINE_FUNCTION + value_type & select( value_type & v ) { return v ; } + + template< class T > + static KOKKOS_INLINE_FUNCTION + value_type & select( const T & ) { value_type * ptr(0); return *ptr ; } + + + template< class F > + static KOKKOS_INLINE_FUNCTION + const_value_type & select( const_value_type & v , const F & ) { return v ; } + + template< class F > + static KOKKOS_INLINE_FUNCTION + value_type & select( value_type & v , const F & ) { return v ; } +}; + +template< typename TrueType > +struct if_c< false , TrueType , void > +{ + enum { value = false }; + + typedef void type ; + typedef void value_type ; +}; + +template< typename FalseType > +struct if_c< true , void , FalseType > +{ + enum { value = true }; + + typedef void type ; + typedef void value_type ; +}; + +template <typename Cond, typename TrueType, typename FalseType> +struct if_ : public if_c<Cond::value, TrueType, FalseType> {}; + +//---------------------------------------------------------------------------- + +// Allows aliased types: +template< typename T > +struct is_integral : public integral_constant< bool , + ( + std::is_same< T , char >::value || + std::is_same< T , unsigned char >::value || + std::is_same< T , short int >::value || + std::is_same< T , unsigned short int >::value || + std::is_same< T , int >::value || + std::is_same< T , unsigned int >::value || + std::is_same< T , long int >::value || + std::is_same< T , unsigned long int >::value || + std::is_same< T , long long int >::value || + std::is_same< T , unsigned long long int >::value || + + std::is_same< T , int8_t >::value || + std::is_same< T , int16_t >::value || + std::is_same< T , int32_t >::value || + std::is_same< T , int64_t >::value || + std::is_same< T , uint8_t >::value || + std::is_same< T , uint16_t >::value || + std::is_same< T , uint32_t >::value || + std::is_same< T , uint64_t >::value + )> +{}; +//---------------------------------------------------------------------------- + +template<typename T> +struct is_label : public false_type {}; + +template<> +struct is_label<const char*> : public true_type {}; + +template<> +struct is_label<char*> : public true_type {}; + + +template<int N> +struct is_label<const char[N]> : public true_type {}; + +template<int N> +struct is_label<char[N]> : public true_type {}; + + +template<> +struct is_label<const std::string> : public true_type {}; + +template<> +struct is_label<std::string> : public true_type {}; + +// These 'constexpr'functions can be used as +// both regular functions and meta-function. + +/**\brief There exists integral 'k' such that N = 2^k */ +KOKKOS_INLINE_FUNCTION +constexpr bool is_integral_power_of_two( const size_t N ) +{ return ( 0 < N ) && ( 0 == ( N & ( N - 1 ) ) ); } + +/**\brief Return integral 'k' such that N = 2^k, assuming valid. */ +KOKKOS_INLINE_FUNCTION +constexpr unsigned integral_power_of_two_assume_valid( const size_t N ) +{ return N == 1 ? 0 : 1 + integral_power_of_two_assume_valid( N >> 1 ); } + +/**\brief Return integral 'k' such that N = 2^k, if exists. + * If does not exist return ~0u. + */ +KOKKOS_INLINE_FUNCTION +constexpr unsigned integral_power_of_two( const size_t N ) +{ return is_integral_power_of_two(N) ? integral_power_of_two_assume_valid(N) : ~0u ; } + +//---------------------------------------------------------------------------- + +template < size_t N > +struct is_power_of_two +{ + enum type { value = (N > 0) && !(N & (N-1)) }; +}; + +template < size_t N , bool OK = is_power_of_two<N>::value > +struct power_of_two ; + +template < size_t N > +struct power_of_two<N,true> +{ + enum type { value = 1+ power_of_two<(N>>1),true>::value }; +}; + +template <> +struct power_of_two<2,true> +{ + enum type { value = 1 }; +}; + +template <> +struct power_of_two<1,true> +{ + enum type { value = 0 }; +}; + +/** \brief If power of two then return power, + * otherwise return ~0u. + */ +static KOKKOS_FORCEINLINE_FUNCTION +unsigned power_of_two_if_valid( const unsigned N ) +{ + unsigned p = ~0u ; + if ( N && ! ( N & ( N - 1 ) ) ) { +#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) + p = __ffs(N) - 1 ; +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + p = __builtin_ffs(N) - 1 ; +#elif defined( __INTEL_COMPILER ) + p = _bit_scan_forward(N); +#else + p = 0 ; + for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; } +#endif + } + return p ; +} + +//---------------------------------------------------------------------------- + +template< typename T , T v , bool NonZero = ( v != T(0) ) > +struct integral_nonzero_constant +{ + // Declaration of 'static const' causes an unresolved linker symbol in debug + // static const T value = v ; + enum { value = T(v) }; + typedef T value_type ; + typedef integral_nonzero_constant<T,v> type ; + KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {} +}; + +template< typename T , T zero > +struct integral_nonzero_constant<T,zero,false> +{ + const T value ; + typedef T value_type ; + typedef integral_nonzero_constant<T,0> type ; + KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {} +}; + +//---------------------------------------------------------------------------- + +template < class C > struct is_integral_constant : public false_ +{ + typedef void integral_type ; + enum { integral_value = 0 }; +}; + +template < typename T , T v > +struct is_integral_constant< integral_constant<T,v> > : public true_ +{ + typedef T integral_type ; + enum { integral_value = v }; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOSTRAITS_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp new file mode 100644 index 0000000000000000000000000000000000000000..94c8e13c1d445953fabc852aaece3fa8d07fa5eb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp @@ -0,0 +1,886 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VIEWDEFAULT_HPP +#define KOKKOS_VIEWDEFAULT_HPP + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +struct ViewAssignment< ViewDefault , ViewDefault , void > +{ + typedef ViewDefault Specialize ; + + //------------------------------------ + /** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/ + + template< class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > + KOKKOS_INLINE_FUNCTION + ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst , + const View<ST,SL,SD,SM,Specialize> & src , + const typename enable_if<( + ViewAssignable< ViewTraits<DT,DL,DD,DM> , + ViewTraits<ST,SL,SD,SM> >::value + || + ( ViewAssignable< ViewTraits<DT,DL,DD,DM> , + ViewTraits<ST,SL,SD,SM> >::assignable_value + && + ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type , + typename ViewTraits<ST,SL,SD,SM>::shape_type >::value + && + is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value + && (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value || + is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value)) + )>::type * = 0 ) + { + dst.m_offset_map.assign( src.m_offset_map ); + + dst.m_management = src.m_management ; + + dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker ); + + if( dst.is_managed ) + dst.m_tracker = src.m_tracker ; + else { + dst.m_tracker = AllocationTracker(); + dst.m_management.set_unmanaged(); + } + } + + + /** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */ + + template< class DT , class DL , class DD , class DM , + class ST , class SD , class SM > + KOKKOS_INLINE_FUNCTION + ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst , + const View<ST,LayoutStride,SD,SM,Specialize> & src , + const typename enable_if<( + ( + ViewAssignable< ViewTraits<DT,DL,DD,DM> , + ViewTraits<ST,LayoutStride,SD,SM> >::value + || + ( ViewAssignable< ViewTraits<DT,DL,DD,DM> , + ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value + && + ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type , + typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value + ) + ) + && + (View<DT,DL,DD,DM,Specialize>::rank==1) + && (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value || + is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value) + )>::type * = 0 ) + { + size_t strides[8]; + src.stride(strides); + if(strides[0]!=1) { + Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1"); + } + dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 ); + + dst.m_management = src.m_management ; + + dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker ); + + if( dst.is_managed ) + dst.m_tracker = src.m_tracker ; + else { + dst.m_tracker = AllocationTracker(); + dst.m_management.set_unmanaged(); + } + } + + //------------------------------------ + /** \brief Deep copy data from compatible value type, layout, rank, and specialization. + * Check the dimensions and allocation lengths at runtime. + */ + template< class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > + inline static + void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst , + const View<ST,SL,SD,SM,Specialize> & src , + const typename Impl::enable_if<( + Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type , + typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value + && + Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout , + typename ViewTraits<ST,SL,SD,SM>::array_layout >::value + && + ( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) ) + )>::type * = 0 ) + { + typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ; + typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ; + + if ( dst.ptr_on_device() != src.ptr_on_device() ) { + + Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map ); + + const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity(); + + DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes ); + } + } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class ExecSpace , class DT , class DL, class DD, class DM, class DS > +struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true > +{ + Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); } + + ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } +}; + +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type + > +struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > +{ +private: + + typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ; + + enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; + enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; + enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; + enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; + enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; + enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; + enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; + enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; + + // The source view rank must be equal to the input argument rank + // Once a void argument is encountered all subsequent arguments must be void. + enum { InputRank = + Impl::StaticAssert<( SrcViewType::rank == + ( V0 ? 0 : ( + V1 ? 1 : ( + V2 ? 2 : ( + V3 ? 3 : ( + V4 ? 4 : ( + V5 ? 5 : ( + V6 ? 6 : ( + V7 ? 7 : 8 ))))))) )) + && + ( SrcViewType::rank == + ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) + >::value ? SrcViewType::rank : 0 }; + + enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; + enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; + enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; + enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; + enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; + enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; + enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; + enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; + + enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Reverse + enum { R0_rev = 0 == InputRank ? 0u : ( + 1 == InputRank ? unsigned(R0) : ( + 2 == InputRank ? unsigned(R1) : ( + 3 == InputRank ? unsigned(R2) : ( + 4 == InputRank ? unsigned(R3) : ( + 5 == InputRank ? unsigned(R4) : ( + 6 == InputRank ? unsigned(R5) : ( + 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; + + typedef typename SrcViewType::array_layout SrcViewLayout ; + + // Choose array layout, attempting to preserve original layout if at all possible. + typedef typename Impl::if_c< + ( // Same Layout IF + // OutputRank 0 + ( OutputRank == 0 ) + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value ) + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value ) + ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; + + // Choose data type as a purely dynamic rank array to accomodate a runtime range. + typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , + typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, + typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, + typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, + typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, + typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, + typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, + typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, + typename SrcViewType::value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; + + // Choose space. + // If the source view's template arg1 or arg2 is a space then use it, + // otherwise use the source view's execution space. + + typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , + typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type + >::type >::type OutputSpace ; + +public: + + // If keeping the layout then match non-data type arguments + // else keep execution space and memory traits. + typedef typename + Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value + , Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , Kokkos::View< OutputData , OutputViewLayout , OutputSpace + , typename SrcViewType::memory_traits + , Impl::ViewDefault > + >::type type ; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// Construct subview of a Rank 8 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + , const SubArg3_type & arg3 + , const SubArg4_type & arg4 + , const SubArg5_type & arg5 + , const SubArg6_type & arg6 + , const SubArg7_type & arg7 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; + typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; + typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; + typedef Impl::ViewOffsetRange< SubArg6_type > R6 ; + typedef Impl::ViewOffsetRange< SubArg7_type > R7 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , R3::dimension( src.m_offset_map.N3 , arg3 ) + , R4::dimension( src.m_offset_map.N4 , arg4 ) + , R5::dimension( src.m_offset_map.N5 , arg5 ) + , R6::dimension( src.m_offset_map.N6 , arg6 ) + , R7::dimension( src.m_offset_map.N7 , arg7 ) + ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + , R3::begin( arg3 ) + , R4::begin( arg4 ) + , R5::begin( arg5 ) + , R6::begin( arg6 ) + , R7::begin( arg7 ) ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 7 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + , const SubArg3_type & arg3 + , const SubArg4_type & arg4 + , const SubArg5_type & arg5 + , const SubArg6_type & arg6 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; + typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; + typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; + typedef Impl::ViewOffsetRange< SubArg6_type > R6 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , R3::dimension( src.m_offset_map.N3 , arg3 ) + , R4::dimension( src.m_offset_map.N4 , arg4 ) + , R5::dimension( src.m_offset_map.N5 , arg5 ) + , R6::dimension( src.m_offset_map.N6 , arg6 ) + , 0 + ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + , R3::begin( arg3 ) + , R4::begin( arg4 ) + , R5::begin( arg5 ) + , R6::begin( arg6 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 6 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + , const SubArg3_type & arg3 + , const SubArg4_type & arg4 + , const SubArg5_type & arg5 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; + typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; + typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , R3::dimension( src.m_offset_map.N3 , arg3 ) + , R4::dimension( src.m_offset_map.N4 , arg4 ) + , R5::dimension( src.m_offset_map.N5 , arg5 ) + , 0 + , 0 + ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + , R3::begin( arg3 ) + , R4::begin( arg4 ) + , R5::begin( arg5 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 5 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + , const SubArg3_type & arg3 + , const SubArg4_type & arg4 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , void , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; + typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , R3::dimension( src.m_offset_map.N3 , arg3 ) + , R4::dimension( src.m_offset_map.N4 , arg4 ) + , 0 + , 0 + , 0 + ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + , R3::begin( arg3 ) + , R4::begin( arg4 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 4 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + , const SubArg3_type & arg3 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , void , void , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , R3::dimension( src.m_offset_map.N3 , arg3 ) + , 0 + , 0 + , 0 + , 0 + ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + , R3::begin( arg3 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 3 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + , const SubArg2_type & arg2 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , R2::dimension( src.m_offset_map.N2 , arg2 ) + , 0 , 0 , 0 , 0 , 0); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + , R2::begin( arg2 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 2 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + , const SubArg1_type & arg1 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , SubArg1_type , void , void , void , void , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , R1::dimension( src.m_offset_map.N1 , arg1 ) + , 0 , 0 , 0 , 0 , 0 , 0 ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + , R1::begin( arg1 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +// Construct subview of a Rank 1 view +template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type + > +KOKKOS_INLINE_FUNCTION +View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: +View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src + , const SubArg0_type & arg0 + ) + : m_ptr_on_device( (typename traits::value_type*) NULL) + , m_offset_map() + , m_management() + , m_tracker() +{ + // This constructor can only be used to construct a subview + // from the source view. This type must match the subview type + // deduced from the source view and subview arguments. + + typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > + , SubArg0_type , void , void , void , void , void , void , void > + ViewSubviewDeduction ; + + enum { is_a_valid_subview_constructor = + Impl::StaticAssert< + Impl::is_same< View , typename ViewSubviewDeduction::type >::value + >::value + }; + + if ( is_a_valid_subview_constructor ) { + + typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; + + // 'assign_subview' returns whether the subview offset_map + // introduces noncontiguity in the view. + const bool introduce_noncontiguity = + m_offset_map.assign_subview( src.m_offset_map + , R0::dimension( src.m_offset_map.N0 , arg0 ) + , 0 , 0 , 0 , 0 , 0 , 0 , 0 ); + + if ( m_offset_map.capacity() ) { + + m_management = src.m_management ; + + if ( introduce_noncontiguity ) m_management.set_noncontiguous(); + + m_ptr_on_device = src.m_ptr_on_device + + src.m_offset_map( R0::begin( arg0 ) + ); + m_tracker = src.m_tracker ; + } + } +} + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5748e722c0076e9f47a7c538bd4d2b6f7458e9b8 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp @@ -0,0 +1,1341 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VIEWOFFSET_HPP +#define KOKKOS_VIEWOFFSET_HPP + +#include <Kokkos_Pair.hpp> +#include <Kokkos_Layout.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Shape.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { namespace Impl { + +template < class ShapeType , class LayoutType , typename Enable = void > +struct ViewOffset ; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding +template < class ShapeType > +struct ViewOffset< ShapeType , LayoutLeft + , typename enable_if<( 1 >= ShapeType::rank + || + 0 == ShapeType::rank_dynamic + )>::type > + : public ShapeType +{ + typedef size_t size_type ; + typedef ShapeType shape_type ; + typedef LayoutLeft array_layout ; + + enum { has_padding = false }; + + template< unsigned R > + KOKKOS_INLINE_FUNCTION + void assign( size_t n ) + { assign_shape_dimension<R>( *this , n ); } + + // Return whether the subview introduced noncontiguity + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 0 == shape_type::rank && + Impl::is_same<L,LayoutLeft>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { + return false ; // did not introduce noncontiguity + } + + // This subview must be 1 == rank and 1 == rank_dynamic. + // The source dimension #0 must be non-zero and all other dimensions are zero. + // Return whether the subview introduced noncontiguity + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 1 == shape_type::rank && + 1 == shape_type::rank_dynamic && + 1 <= S::rank && + Impl::is_same<L,LayoutLeft>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { + // n1 .. n7 must be zero + shape_type::N0 = n0 ; + return false ; // did not introduce noncontiguity + } + + + KOKKOS_INLINE_FUNCTION + void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 + , size_t = 0 ) + { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + )>::type * = 0 ) + { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs + , typename enable_if<( 1 == int(ShapeRHS::rank) + && + 1 == int(shape_type::rank) + && + 1 == int(shape_type::rank_dynamic) + )>::type * = 0 ) + { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } + + KOKKOS_INLINE_FUNCTION + void set_padding() {} + + KOKKOS_INLINE_FUNCTION + size_type cardinality() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type capacity() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < shape_type::rank ) { s[1] = shape_type::N0 ; } + if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; } + if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; } + if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; } + if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; } + if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; } + if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; } + if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; } + } + + KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION size_type stride_1() const { return shape_type::N0 ; } + KOKKOS_INLINE_FUNCTION size_type stride_2() const { return shape_type::N0 * shape_type::N1 ; } + KOKKOS_INLINE_FUNCTION size_type stride_3() const { return shape_type::N0 * shape_type::N1 * shape_type::N2 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_4() const + { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_5() const + { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_6() const + { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_7() const + { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; } + + // rank 1 + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i0 + shape_type::N0 * i1 ; } + + //rank 3 + template <typename I0, typename I1, typename I2> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0 + , I1 const& i1 + , I2 const& i2 + ) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * i2 ); + } + + //rank 4 + template <typename I0, typename I1, typename I2, typename I3> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * ( + i5 + shape_type::N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6, typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7) const + { + return i0 + shape_type::N0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * ( + i5 + shape_type::N5 * ( + i6 + shape_type::N6 * i7 )))))); + } +}; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding +template < class ShapeType > +struct ViewOffset< ShapeType , LayoutLeft + , typename enable_if<( 1 < ShapeType::rank + && + 0 < ShapeType::rank_dynamic + )>::type > + : public ShapeType +{ + typedef size_t size_type ; + typedef ShapeType shape_type ; + typedef LayoutLeft array_layout ; + + enum { has_padding = true }; + + size_type S0 ; + + // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #0. + // The source dimension #0 must be non-zero for stride-one leading dimension. + // At most subsequent dimension can be non-zero. + // Return whether the subview introduced noncontiguity. + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 2 == shape_type::rank && + 2 == shape_type::rank_dynamic && + 2 <= S::rank && + Impl::is_same<L,LayoutLeft>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & rhs + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { + // N1 = second non-zero dimension + // S0 = stride for second non-zero dimension + shape_type::N0 = n0 ; + shape_type::N1 = 0 ; + S0 = 0 ; + + if ( n1 ) { shape_type::N1 = n1 ; S0 = rhs.stride_1(); } + else if ( 2 < S::rank && n2 ) { shape_type::N1 = n2 ; S0 = rhs.stride_2(); } + else if ( 3 < S::rank && n3 ) { shape_type::N1 = n3 ; S0 = rhs.stride_3(); } + else if ( 4 < S::rank && n4 ) { shape_type::N1 = n4 ; S0 = rhs.stride_4(); } + else if ( 5 < S::rank && n5 ) { shape_type::N1 = n5 ; S0 = rhs.stride_5(); } + else if ( 6 < S::rank && n6 ) { shape_type::N1 = n6 ; S0 = rhs.stride_6(); } + else if ( 7 < S::rank && n7 ) { shape_type::N1 = n7 ; S0 = rhs.stride_7(); } + + // Introduce noncontiguity if change the first dimension + // or took a range of a dimension after the second. + return ( size_t(shape_type::N0) != size_t(rhs.N0) ) || ( 0 == n1 ); + } + + + template< unsigned R > + KOKKOS_INLINE_FUNCTION + void assign( size_t n ) + { assign_shape_dimension<R>( *this , n ); } + + + KOKKOS_INLINE_FUNCTION + void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 + , size_t = 0 ) + { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); S0 = shape_type::N0 ; } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + && + int(ShapeRHS::rank_dynamic) == 0 + )>::type * = 0 ) + { + shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); + S0 = shape_type::N0 ; // No padding when dynamic_rank == 0 + } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + && + int(ShapeRHS::rank_dynamic) > 0 + )>::type * = 0 ) + { + shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); + S0 = rhs.S0 ; // possibly padding when dynamic rank > 0 + } + + KOKKOS_INLINE_FUNCTION + void set_padding() + { + enum { div = MEMORY_ALIGNMENT / shape_type::scalar_size }; + enum { mod = MEMORY_ALIGNMENT % shape_type::scalar_size }; + enum { align = 0 == mod ? div : 0 }; + + if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < S0 ) { + + const size_type count_mod = S0 % ( div ? div : 1 ); + + if ( count_mod ) { S0 += align - count_mod ; } + } + } + + KOKKOS_INLINE_FUNCTION + size_type cardinality() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type capacity() const + { return size_type(S0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + // Stride with [ rank ] as total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < shape_type::rank ) { s[1] = S0 ; } + if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; } + if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; } + if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; } + if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; } + if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; } + if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; } + if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; } + } + + KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION size_type stride_1() const { return S0 ; } + KOKKOS_INLINE_FUNCTION size_type stride_2() const { return S0 * shape_type::N1 ; } + KOKKOS_INLINE_FUNCTION size_type stride_3() const { return S0 * shape_type::N1 * shape_type::N2 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_4() const + { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_5() const + { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_6() const + { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_7() const + { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1) const + { return i0 + S0 * i1 ; } + + //rank 3 + template <typename I0, typename I1, typename I2> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * i2 ); + } + + //rank 4 + template <typename I0, typename I1, typename I2, typename I3> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * ( + i5 + shape_type::N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6, typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const + { + return i0 + S0 * ( + i1 + shape_type::N1 * ( + i2 + shape_type::N2 * ( + i3 + shape_type::N3 * ( + i4 + shape_type::N4 * ( + i5 + shape_type::N5 * ( + i6 + shape_type::N6 * i7 )))))); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 >= rank OR 1 >= rank_dynamic ) : no padding / striding +template < class ShapeType > +struct ViewOffset< ShapeType , LayoutRight + , typename enable_if<( 1 >= ShapeType::rank + || + 1 >= ShapeType::rank_dynamic + )>::type > + : public ShapeType +{ + typedef size_t size_type; + typedef ShapeType shape_type; + typedef LayoutRight array_layout ; + + enum { has_padding = false }; + + // This subview must be 1 == rank and 1 == rank_dynamic + // The source view's last dimension must be non-zero + // Return whether the subview introduced noncontiguity + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 0 == shape_type::rank && + Impl::is_same<L,LayoutRight>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { return false ; } + + // This subview must be 1 == rank and 1 == rank_dynamic + // The source view's last dimension must be non-zero + // Return whether the subview introduced noncontiguity + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 1 == shape_type::rank && + 1 == shape_type::rank_dynamic && + 1 <= S::rank && + Impl::is_same<L,LayoutRight>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { + shape_type::N0 = S::rank == 1 ? n0 : ( + S::rank == 2 ? n1 : ( + S::rank == 3 ? n2 : ( + S::rank == 4 ? n3 : ( + S::rank == 5 ? n4 : ( + S::rank == 6 ? n5 : ( + S::rank == 7 ? n6 : n7 )))))); + // should have n0 .. n_(rank-2) equal zero + return false ; + } + + template< unsigned R > + KOKKOS_INLINE_FUNCTION + void assign( size_t n ) + { assign_shape_dimension<R>( *this , n ); } + + KOKKOS_INLINE_FUNCTION + void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 + , size_t = 0 ) + { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + )>::type * = 0 ) + { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs + , typename enable_if<( 1 == int(ShapeRHS::rank) + && + 1 == int(shape_type::rank) + && + 1 == int(shape_type::rank_dynamic) + )>::type * = 0 ) + { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } + + KOKKOS_INLINE_FUNCTION + void set_padding() {} + + KOKKOS_INLINE_FUNCTION + size_type cardinality() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type capacity() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + size_type stride_R() const + { + return size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 * + shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; + }; + + // Stride with [rank] as total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; } + if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; } + if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; } + if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; } + if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; } + if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; } + if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; } + if ( 0 < shape_type::rank ) { s[0] = n ; } + s[shape_type::rank] = n * shape_type::N0 ; + } + + KOKKOS_INLINE_FUNCTION + size_type stride_7() const { return 1 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_6() const { return shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_0() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 * shape_type::N1 ; } + + // rank 1 + template <typename I0> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0) const + { + return i0 ; + } + + // rank 2 + template <typename I0, typename I1> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1 ) const + { + return i1 + shape_type::N1 * i0 ; + } + + template <typename I0, typename I1, typename I2> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const + { + return i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 )); + } + + template <typename I0, typename I1, typename I2, typename I3> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const + { + return i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 ))); + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const + { + return i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 )))); + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const + { + return i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 ))))); + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const + { + return i6 + shape_type::N6 * ( + i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 )))))); + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6, typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const + { + return i7 + shape_type::N7 * ( + i6 + shape_type::N6 * ( + i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( + i1 + shape_type::N1 * ( i0 ))))))); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 < rank AND 1 < rank_dynamic ) : has padding / striding +template < class ShapeType > +struct ViewOffset< ShapeType , LayoutRight + , typename enable_if<( 1 < ShapeType::rank + && + 1 < ShapeType::rank_dynamic + )>::type > + : public ShapeType +{ + typedef size_t size_type; + typedef ShapeType shape_type; + typedef LayoutRight array_layout ; + + enum { has_padding = true }; + + size_type SR ; + + // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #(rank-1). + // The source dimension #(rank-1) must be non-zero for stride-one leading dimension. + // At most one prior dimension can be non-zero. + // Return whether the subview introduced noncontiguity. + template< class S , class L > + KOKKOS_INLINE_FUNCTION + typename Impl::enable_if<( 2 == shape_type::rank && + 2 == shape_type::rank_dynamic && + 2 <= S::rank && + Impl::is_same<L,LayoutRight>::value + ), bool >::type + assign_subview( const ViewOffset<S,L,void> & rhs + , const size_t n0 + , const size_t n1 + , const size_t n2 + , const size_t n3 + , const size_t n4 + , const size_t n5 + , const size_t n6 + , const size_t n7 + ) + { + const size_type nR = S::rank == 2 ? n1 : ( + S::rank == 3 ? n2 : ( + S::rank == 4 ? n3 : ( + S::rank == 5 ? n4 : ( + S::rank == 6 ? n5 : ( + S::rank == 7 ? n6 : n7 ))))); + + // N0 = first non-zero-dimension + // N1 = last non-zero dimension + // SR = stride for second non-zero dimension + shape_type::N0 = 0 ; + shape_type::N1 = nR ; + SR = 0 ; + + if ( n0 ) { shape_type::N0 = n0 ; SR = rhs.stride_0(); } + else if ( 2 < S::rank && n1 ) { shape_type::N0 = n1 ; SR = rhs.stride_1(); } + else if ( 3 < S::rank && n2 ) { shape_type::N0 = n2 ; SR = rhs.stride_2(); } + else if ( 4 < S::rank && n3 ) { shape_type::N0 = n3 ; SR = rhs.stride_3(); } + else if ( 5 < S::rank && n4 ) { shape_type::N0 = n4 ; SR = rhs.stride_4(); } + else if ( 6 < S::rank && n5 ) { shape_type::N0 = n5 ; SR = rhs.stride_5(); } + else if ( 7 < S::rank && n6 ) { shape_type::N0 = n6 ; SR = rhs.stride_6(); } + + // Introduce noncontiguous if change the last dimension + // or take a range of a dimension other than the second-to-last dimension. + + return 2 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N1) || 0 == n0 ) : ( + 3 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N2) || 0 == n1 ) : ( + 4 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N3) || 0 == n2 ) : ( + 5 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N4) || 0 == n3 ) : ( + 6 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N5) || 0 == n4 ) : ( + 7 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N6) || 0 == n5 ) : ( + ( size_t(shape_type::N1) != size_t(rhs.N7) || 0 == n6 ) )))))); + } + + template< unsigned R > + KOKKOS_INLINE_FUNCTION + void assign( size_t n ) + { assign_shape_dimension<R>( *this , n ); } + + KOKKOS_INLINE_FUNCTION + void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 + , size_t = 0 ) + { + shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); + SR = size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; + } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + && + int(ShapeRHS::rank_dynamic) <= 1 + )>::type * = 0 ) + { + shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); + SR = shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; + } + + template< class ShapeRHS > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) + && + int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) + && + int(ShapeRHS::rank_dynamic) > 1 + )>::type * = 0 ) + { + shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); + SR = rhs.SR ; + } + + KOKKOS_INLINE_FUNCTION + void set_padding() + { + enum { div = MEMORY_ALIGNMENT / shape_type::scalar_size }; + enum { mod = MEMORY_ALIGNMENT % shape_type::scalar_size }; + enum { align = 0 == mod ? div : 0 }; + + if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < SR ) { + + const size_type count_mod = SR % ( div ? div : 1 ); + + if ( count_mod ) { SR += align - count_mod ; } + } + } + + KOKKOS_INLINE_FUNCTION + size_type cardinality() const + { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type capacity() const { return shape_type::N0 * SR ; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; } + if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; } + if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; } + if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; } + if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; } + if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; } + if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; } + if ( 0 < shape_type::rank ) { s[0] = SR ; } + s[shape_type::rank] = SR * shape_type::N0 ; + } + + KOKKOS_INLINE_FUNCTION + size_type stride_7() const { return 1 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_6() const { return shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_0() const { return SR ; } + + // rank 2 + template <typename I0, typename I1> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1 ) const + { + return i1 + i0 * SR ; + } + + template <typename I0, typename I1, typename I2> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const + { + return i2 + shape_type::N2 * ( i1 ) + + i0 * SR ; + } + + template <typename I0, typename I1, typename I2, typename I3> + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const + { + return i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( i1 )) + + i0 * SR ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const + { + return i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( i1 ))) + + i0 * SR ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const + { + return i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( i1 )))) + + i0 * SR ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const + { + return i6 + shape_type::N6 * ( + i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( i1 ))))) + + i0 * SR ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6, typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const + { + return i7 + shape_type::N7 * ( + i6 + shape_type::N6 * ( + i5 + shape_type::N5 * ( + i4 + shape_type::N4 * ( + i3 + shape_type::N3 * ( + i2 + shape_type::N2 * ( i1 )))))) + + i0 * SR ; + } +}; + +//---------------------------------------------------------------------------- +// LayoutStride : +template < class ShapeType > +struct ViewOffset< ShapeType , LayoutStride + , typename enable_if<( 0 < ShapeType::rank )>::type > + : public ShapeType +{ + typedef size_t size_type; + typedef ShapeType shape_type; + typedef LayoutStride array_layout ; + + size_type S[ shape_type::rank + 1 ]; + + template< class SType , class L > + KOKKOS_INLINE_FUNCTION + bool assign_subview( const ViewOffset<SType,L,void> & rhs + , const size_type n0 + , const size_type n1 + , const size_type n2 + , const size_type n3 + , const size_type n4 + , const size_type n5 + , const size_type n6 + , const size_type n7 + ) + { + shape_type::assign( *this, 0,0,0,0, 0,0,0,0 ); + + for ( int i = 0 ; i < int(shape_type::rank+1) ; ++i ) { S[i] = 0 ; } + + // preconditions: + // shape_type::rank <= rhs.rank + // shape_type::rank == count of nonzero( rhs_dim[i] ) + size_type dim[8] = { n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 }; + size_type str[ SType::rank + 1 ]; + + rhs.stride( str ); + + // contract the zero-dimensions + int r = 0 ; + for ( int i = 0 ; i < int(SType::rank) ; ++i ) { + if ( 0 != dim[i] ) { + dim[r] = dim[i] ; + str[r] = str[i] ; + ++r ; + } + } + + if ( int(shape_type::rank) == r ) { + // The shape is non-zero + for ( int i = 0 ; i < int(shape_type::rank) ; ++i ) { + const size_type cap = dim[i] * ( S[i] = str[i] ); + if ( S[ shape_type::rank ] < cap ) S[ shape_type::rank ] = cap ; + } + // set the contracted nonzero dimensions + shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] ); + } + + return true ; // definitely noncontiguous + } + + template< unsigned R > + KOKKOS_INLINE_FUNCTION + void assign( size_t n ) + { assign_shape_dimension<R>( *this , n ); } + + template< class ShapeRHS , class Layout > + KOKKOS_INLINE_FUNCTION + void assign( const ViewOffset<ShapeRHS,Layout> & rhs + , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) )>::type * = 0 ) + { + rhs.stride(S); + shape_type::assign( *this, rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); + } + + KOKKOS_INLINE_FUNCTION + void assign( const LayoutStride & layout ) + { + size_type max = 0 ; + for ( int i = 0 ; i < shape_type::rank ; ++i ) { + S[i] = layout.stride[i] ; + const size_type m = layout.dimension[i] * S[i] ; + if ( max < m ) { max = m ; } + } + S[ shape_type::rank ] = max ; + shape_type::assign( *this, layout.dimension[0], layout.dimension[1], + layout.dimension[2], layout.dimension[3], + layout.dimension[4], layout.dimension[5], + layout.dimension[6], layout.dimension[7] ); + } + + KOKKOS_INLINE_FUNCTION + void assign( size_t s0 , size_t s1 , size_t s2 , size_t s3 + , size_t s4 , size_t s5 , size_t s6 , size_t s7 + , size_t s8 ) + { + const size_t str[9] = { s0, s1, s2, s3, s4, s5, s6, s7, s8 }; + + // Last argument is the total length. + // Total length must be non-zero. + // All strides must be non-zero and less than total length. + bool ok = 0 < str[ shape_type::rank ] ; + + for ( int i = 0 ; ( i < shape_type::rank ) && + ( ok = 0 < str[i] && str[i] < str[ shape_type::rank ] ); ++i ); + + if ( ok ) { + size_t dim[8] = { 1,1,1,1,1,1,1,1 }; + int iorder[9] = { 0,0,0,0,0,0,0,0,0 }; + + // Ordering of strides smallest to largest. + for ( int i = 1 ; i < shape_type::rank ; ++i ) { + int j = i ; + for ( ; 0 < j && str[i] < str[ iorder[j-1] ] ; --j ) { + iorder[j] = iorder[j-1] ; + } + iorder[j] = i ; + } + + // Last argument is the total length. + iorder[ shape_type::rank ] = shape_type::rank ; + + // Determine dimension associated with each stride. + // Guarantees non-overlap by truncating dimension + // if ( 0 != str[ iorder[i+1] ] % str[ iorder[i] ] ) + for ( int i = 0 ; i < shape_type::rank ; ++i ) { + dim[ iorder[i] ] = str[ iorder[i+1] ] / str[ iorder[i] ] ; + } + + // Assign dimensions and strides: + shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] ); + for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = str[i] ; } + } + else { + shape_type::assign(*this,0,0,0,0,0,0,0,0); + for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = 0 ; } + } + } + + KOKKOS_INLINE_FUNCTION + void set_padding() {} + + KOKKOS_INLINE_FUNCTION + size_type cardinality() const + { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } + + KOKKOS_INLINE_FUNCTION + size_type capacity() const { return S[ shape_type::rank ]; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { for ( int i = 0 ; i <= shape_type::rank ; ++i ) { s[i] = S[i] ; } } + + KOKKOS_INLINE_FUNCTION + size_type stride_0() const { return S[0] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_1() const { return S[1] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_2() const { return S[2] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_3() const { return S[3] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_4() const { return S[4] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_5() const { return S[5] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_6() const { return S[6] ; } + + KOKKOS_INLINE_FUNCTION + size_type stride_7() const { return S[7] ; } + + // rank 1 + template <typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==1),size_type>::type + operator()( I0 const& i0) const + { + return i0 * S[0] ; + } + + // rank 2 + template <typename I0, typename I1> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==2),size_type>::type + operator()( I0 const& i0, I1 const& i1 ) const + { + return i0 * S[0] + i1 * S[1] ; + } + + template <typename I0, typename I1, typename I2> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==3),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] ; + } + + template <typename I0, typename I1, typename I2, typename I3> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==4),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==5),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==6),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==7),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] ; + } + + template < typename I0, typename I1, typename I2, typename I3 + ,typename I4, typename I5, typename I6, typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==8),size_type>::type + operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const + { + return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] + i7 * S[7] ; + } +}; + +//---------------------------------------------------------------------------- + +template< class T > +struct ViewOffsetRange { + + enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<T>::value >::value }; + + enum { is_range = false }; + + KOKKOS_INLINE_FUNCTION static + size_t dimension( size_t const , T const & ) { return 0 ; } + + KOKKOS_INLINE_FUNCTION static + size_t begin( T const & i ) { return size_t(i) ; } +}; + +template<> +struct ViewOffsetRange<void> { + enum { is_range = false }; +}; + +template<> +struct ViewOffsetRange< Kokkos::ALL > { + enum { is_range = true }; + + KOKKOS_INLINE_FUNCTION static + size_t dimension( size_t const n , ALL const & ) { return n ; } + + KOKKOS_INLINE_FUNCTION static + size_t begin( ALL const & ) { return 0 ; } +}; + +template< typename iType > +struct ViewOffsetRange< std::pair<iType,iType> > { + + enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value }; + + enum { is_range = true }; + + KOKKOS_INLINE_FUNCTION static + size_t dimension( size_t const n , std::pair<iType,iType> const & r ) + { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; } + + KOKKOS_INLINE_FUNCTION static + size_t begin( std::pair<iType,iType> const & r ) { return size_t(r.first) ; } +}; + +template< typename iType > +struct ViewOffsetRange< Kokkos::pair<iType,iType> > { + + enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value }; + + enum { is_range = true }; + + KOKKOS_INLINE_FUNCTION static + size_t dimension( size_t const n , Kokkos::pair<iType,iType> const & r ) + { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; } + + KOKKOS_INLINE_FUNCTION static + size_t begin( Kokkos::pair<iType,iType> const & r ) { return size_t(r.first) ; } +}; + +}} // namespace Kokkos::Impl + +#endif //KOKKOS_VIEWOFFSET_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b63039f57000e9d3b0ffa2aaad5a0c3c94d27c4 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp @@ -0,0 +1,393 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VIEWSUPPORT_HPP +#define KOKKOS_VIEWSUPPORT_HPP + +#include <algorithm> +#include <Kokkos_ExecPolicy.hpp> +#include <impl/Kokkos_Shape.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Evaluate if LHS = RHS view assignment is allowed. */ +template< class ViewLHS , class ViewRHS > +struct ViewAssignable +{ + // Same memory space. + // Same value type. + // Compatible 'const' qualifier + // Cannot assign managed = unmannaged + enum { assignable_value = + ( is_same< typename ViewLHS::value_type , + typename ViewRHS::value_type >::value + || + is_same< typename ViewLHS::value_type , + typename ViewRHS::const_value_type >::value ) + && + is_same< typename ViewLHS::memory_space , + typename ViewRHS::memory_space >::value + && + ( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) ) + }; + + enum { assignable_shape = + // Compatible shape and matching layout: + ( ShapeCompatible< typename ViewLHS::shape_type , + typename ViewRHS::shape_type >::value + && + is_same< typename ViewLHS::array_layout , + typename ViewRHS::array_layout >::value ) + || + // Matching layout, same rank, and LHS dynamic rank + ( is_same< typename ViewLHS::array_layout , + typename ViewRHS::array_layout >::value + && + int(ViewLHS::rank) == int(ViewRHS::rank) + && + int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) ) + || + // Both rank-0, any shape and layout + ( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 ) + || + // Both rank-1 and LHS is dynamic rank-1, any shape and layout + ( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 && + int(ViewLHS::rank_dynamic) == 1 ) + }; + + enum { value = assignable_value && assignable_shape }; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class ExecSpace , class Type , bool Initialize > +struct ViewDefaultConstruct +{ ViewDefaultConstruct( Type * , size_t ) {} }; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class OutputView , class InputView , unsigned Rank = OutputView::Rank > +struct ViewRemap +{ + typedef typename OutputView::size_type size_type ; + + const OutputView output ; + const InputView input ; + const size_type n0 ; + const size_type n1 ; + const size_type n2 ; + const size_type n3 ; + const size_type n4 ; + const size_type n5 ; + const size_type n6 ; + const size_type n7 ; + + ViewRemap( const OutputView & arg_out , const InputView & arg_in ) + : output( arg_out ), input( arg_in ) + , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) + , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) + , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) + , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) + , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) + , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) + , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) + , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) + { + typedef typename OutputView::execution_space execution_space ; + Kokkos::RangePolicy< execution_space > range( 0 , n0 ); + parallel_for( range , *this ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i0 ) const + { + for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) { + for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) { + for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) { + for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) { + for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) { + for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) { + for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) { + output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7); + }}}}}}} + } +}; + +template< class OutputView , class InputView > +struct ViewRemap< OutputView , InputView , 0 > +{ + typedef typename OutputView::value_type value_type ; + typedef typename OutputView::memory_space dst_space ; + typedef typename InputView ::memory_space src_space ; + + ViewRemap( const OutputView & arg_out , const InputView & arg_in ) + { + DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() , + arg_in.ptr_on_device() , + sizeof(value_type) ); + } +}; + +//---------------------------------------------------------------------------- + +template< class ExecSpace , class Type > +struct ViewDefaultConstruct< ExecSpace , Type , true > +{ + Type * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { m_ptr[i] = Type(); } + + ViewDefaultConstruct( Type * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } +}; + +template< class OutputView , unsigned Rank = OutputView::Rank , + class Enabled = void > +struct ViewFill +{ + typedef typename OutputView::const_value_type const_value_type ; + typedef typename OutputView::size_type size_type ; + + const OutputView output ; + const_value_type input ; + + ViewFill( const OutputView & arg_out , const_value_type & arg_in ) + : output( arg_out ), input( arg_in ) + { + typedef typename OutputView::execution_space execution_space ; + Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() ); + parallel_for( range , *this ); + execution_space::fence(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i0 ) const + { + for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) { + for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) { + for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) { + for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) { + for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) { + for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) { + for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) { + output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ; + }}}}}}} + } +}; + +template< class OutputView > +struct ViewFill< OutputView , 0 > +{ + typedef typename OutputView::const_value_type const_value_type ; + typedef typename OutputView::memory_space dst_space ; + + ViewFill( const OutputView & arg_out , const_value_type & arg_in ) + { + DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in , + sizeof(const_value_type) ); + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +struct ViewAllocateWithoutInitializing { + + const std::string label ; + + ViewAllocateWithoutInitializing() : label() {} + explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {} + explicit ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {} +}; + +struct ViewAllocate { + + const std::string label ; + + ViewAllocate() : label() {} + ViewAllocate( const std::string & arg_label ) : label( arg_label ) {} + ViewAllocate( const char * const arg_label ) : label( arg_label ) {} +}; + +} + +namespace Kokkos { +namespace Impl { + +template< class Traits , class AllocationProperties , class Enable = void > +struct ViewAllocProp : public Kokkos::Impl::false_type {}; + +template< class Traits > +struct ViewAllocProp< Traits , Kokkos::ViewAllocate + , typename Kokkos::Impl::enable_if<( + Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value + )>::type > + : public Kokkos::Impl::true_type +{ + typedef size_t size_type ; + typedef const ViewAllocate & property_type ; + + enum { Initialize = true }; + enum { AllowPadding = false }; + + inline + static const std::string & label( property_type p ) { return p.label ; } +}; + +template< class Traits > +struct ViewAllocProp< Traits , std::string + , typename Kokkos::Impl::enable_if<( + Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value + )>::type > + : public Kokkos::Impl::true_type +{ + typedef size_t size_type ; + typedef const std::string & property_type ; + + enum { Initialize = true }; + enum { AllowPadding = false }; + + inline + static const std::string & label( property_type s ) { return s ; } +}; + +template< class Traits , unsigned N > +struct ViewAllocProp< Traits , char[N] + , typename Kokkos::Impl::enable_if<( + Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value + )>::type > + : public Kokkos::Impl::true_type +{ +private: + typedef char label_type[N] ; +public: + + typedef size_t size_type ; + typedef const label_type & property_type ; + + enum { Initialize = true }; + enum { AllowPadding = false }; + + inline + static std::string label( property_type s ) { return std::string(s) ; } +}; + +template< class Traits > +struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing + , typename Kokkos::Impl::enable_if<( + Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value + )>::type > + : public Kokkos::Impl::true_type +{ + typedef size_t size_type ; + typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ; + + enum { Initialize = false }; + enum { AllowPadding = false }; + + inline + static std::string label( property_type s ) { return s.label ; } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class Traits , class PointerProperties , class Enable = void > +struct ViewRawPointerProp : public Kokkos::Impl::false_type {}; + +template< class Traits , typename T > +struct ViewRawPointerProp< Traits , T , + typename Kokkos::Impl::enable_if<( + Impl::is_same< T , typename Traits::value_type >::value || + Impl::is_same< T , typename Traits::non_const_value_type >::value + )>::type > + : public Kokkos::Impl::true_type +{ + typedef size_t size_type ; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */ + + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp new file mode 100644 index 0000000000000000000000000000000000000000..61d2e35702f998a83e0796e7d291dff7e3466dd4 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp @@ -0,0 +1,56 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VIEWTILELEFT_HPP +#define KOKKOS_VIEWTILELEFT_HPP + +#include <impl/KokkosExp_ViewTile.hpp> + +namespace Kokkos { + +using Kokkos::Experimental::tile_subview ; + +} + +#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp b/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp new file mode 100644 index 0000000000000000000000000000000000000000..420ee63891e6ddb0995ad7bbbcfba2f0548c2bd9 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD ) +#define KOKKOS_VOLATILE_LOAD + +#if defined( __GNUC__ ) /* GNU C */ || \ + defined( __GNUG__ ) /* GNU C++ */ || \ + defined( __clang__ ) + +#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__)) + +#else + +#define KOKKOS_MAY_ALIAS + +#endif + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +T volatile_load(T const volatile * const src_ptr) +{ + typedef uint64_t KOKKOS_MAY_ALIAS T64; + typedef uint32_t KOKKOS_MAY_ALIAS T32; + typedef uint16_t KOKKOS_MAY_ALIAS T16; + typedef uint8_t KOKKOS_MAY_ALIAS T8; + + enum { + NUM_8 = sizeof(T), + NUM_16 = NUM_8 / 2, + NUM_32 = NUM_8 / 4, + NUM_64 = NUM_8 / 8 + }; + + union { + T const volatile * const ptr; + T64 const volatile * const ptr64; + T32 const volatile * const ptr32; + T16 const volatile * const ptr16; + T8 const volatile * const ptr8; + } src = {src_ptr}; + + T result; + + union { + T * const ptr; + T64 * const ptr64; + T32 * const ptr32; + T16 * const ptr16; + T8 * const ptr8; + } dst = {&result}; + + for (int i=0; i < NUM_64; ++i) { + dst.ptr64[i] = src.ptr64[i]; + } + + if ( NUM_64*2 < NUM_32 ) { + dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2]; + } + + if ( NUM_32*2 < NUM_16 ) { + dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2]; + } + + if ( NUM_16*2 < NUM_8 ) { + dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2]; + } + + return result; +} + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr) +{ + typedef uint64_t KOKKOS_MAY_ALIAS T64; + typedef uint32_t KOKKOS_MAY_ALIAS T32; + typedef uint16_t KOKKOS_MAY_ALIAS T16; + typedef uint8_t KOKKOS_MAY_ALIAS T8; + + enum { + NUM_8 = sizeof(T), + NUM_16 = NUM_8 / 2, + NUM_32 = NUM_8 / 4, + NUM_64 = NUM_8 / 8 + }; + + union { + T const volatile * const ptr; + T64 const volatile * const ptr64; + T32 const volatile * const ptr32; + T16 const volatile * const ptr16; + T8 const volatile * const ptr8; + } src = {src_ptr}; + + union { + T volatile * const ptr; + T64 volatile * const ptr64; + T32 volatile * const ptr32; + T16 volatile * const ptr16; + T8 volatile * const ptr8; + } dst = {dst_ptr}; + + for (int i=0; i < NUM_64; ++i) { + dst.ptr64[i] = src.ptr64[i]; + } + + if ( NUM_64*2 < NUM_32 ) { + dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2]; + } + + if ( NUM_32*2 < NUM_16 ) { + dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2]; + } + + if ( NUM_16*2 < NUM_8 ) { + dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2]; + } +} + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +void volatile_store(T volatile * const dst_ptr, T const * const src_ptr) +{ + typedef uint64_t KOKKOS_MAY_ALIAS T64; + typedef uint32_t KOKKOS_MAY_ALIAS T32; + typedef uint16_t KOKKOS_MAY_ALIAS T16; + typedef uint8_t KOKKOS_MAY_ALIAS T8; + + enum { + NUM_8 = sizeof(T), + NUM_16 = NUM_8 / 2, + NUM_32 = NUM_8 / 4, + NUM_64 = NUM_8 / 8 + }; + + union { + T const * const ptr; + T64 const * const ptr64; + T32 const * const ptr32; + T16 const * const ptr16; + T8 const * const ptr8; + } src = {src_ptr}; + + union { + T volatile * const ptr; + T64 volatile * const ptr64; + T32 volatile * const ptr32; + T16 volatile * const ptr16; + T8 volatile * const ptr8; + } dst = {dst_ptr}; + + for (int i=0; i < NUM_64; ++i) { + dst.ptr64[i] = src.ptr64[i]; + } + + if ( NUM_64*2 < NUM_32 ) { + dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2]; + } + + if ( NUM_32*2 < NUM_16 ) { + dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2]; + } + + if ( NUM_16*2 < NUM_8 ) { + dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2]; + } +} + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +void volatile_store(T volatile * dst_ptr, T const volatile & src) +{ volatile_store(dst_ptr, &src); } + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +void volatile_store(T volatile * dst_ptr, T const & src) +{ volatile_store(dst_ptr, &src); } + +template <typename T> +KOKKOS_FORCEINLINE_FUNCTION +T safe_load(T const * const ptr) +{ +#if !defined( __MIC__ ) + return *ptr; +#else + return volatile_load(ptr); +#endif +} + +} // namespace kokkos + +#undef KOKKOS_MAY_ALIAS + +#endif + + + diff --git a/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cb561f711c3e3f86b07c8c9f24d96bb39bb3d765 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp @@ -0,0 +1,726 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define DEBUG_PRINT 0 + +#include <iostream> +#include <sstream> +#include <algorithm> + +#include <Kokkos_Macros.hpp> +#include <Kokkos_hwloc.hpp> +#include <impl/Kokkos_Error.hpp> + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace hwloc { + +/* Return 0 if asynchronous, 1 if synchronous and include process. */ +unsigned thread_mapping( const char * const label , + const bool allow_async , + unsigned & thread_count , + unsigned & use_numa_count , + unsigned & use_cores_per_numa , + std::pair<unsigned,unsigned> threads_coord[] ) +{ + const bool hwloc_avail = Kokkos::hwloc::available(); + const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ; + const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ; + const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ; + + // (numa,core) coordinate of the process: + const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate(); + + //------------------------------------------------------------------------ + // Defaults for unspecified inputs: + + if ( ! use_numa_count ) { + // Default to use all NUMA regions + use_numa_count = ! thread_count ? avail_numa_count : ( + thread_count < avail_numa_count ? thread_count : avail_numa_count ); + } + + if ( ! use_cores_per_numa ) { + // Default to use all but one core if asynchronous, all cores if synchronous. + const unsigned threads_per_numa = thread_count / use_numa_count ; + + use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : ( + threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa ); + } + + if ( ! thread_count ) { + thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ; + } + + //------------------------------------------------------------------------ + // Input verification: + + const bool valid_numa = use_numa_count <= avail_numa_count ; + const bool valid_cores = use_cores_per_numa && + use_cores_per_numa <= avail_cores_per_numa ; + const bool valid_threads = thread_count && + thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ; + const bool balanced_numa = ! ( thread_count % use_numa_count ); + const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) ); + + const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ; + + if ( ! valid_input ) { + + std::ostringstream msg ; + + msg << label << " HWLOC ERROR(s)" ; + + if ( ! valid_threads ) { + msg << " : thread_count(" << thread_count + << ") exceeds capacity(" + << use_numa_count * use_cores_per_numa * avail_threads_per_core + << ")" ; + } + if ( ! valid_numa ) { + msg << " : use_numa_count(" << use_numa_count + << ") exceeds capacity(" << avail_numa_count << ")" ; + } + if ( ! valid_cores ) { + msg << " : use_cores_per_numa(" << use_cores_per_numa + << ") exceeds capacity(" << avail_cores_per_numa << ")" ; + } + if ( ! balanced_numa ) { + msg << " : thread_count(" << thread_count + << ") imbalanced among numa(" << use_numa_count << ")" ; + } + if ( ! balanced_cores ) { + msg << " : thread_count(" << thread_count + << ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ; + } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + const unsigned thread_spawn_synchronous = + ( allow_async && + 1 < thread_count && + ( use_numa_count < avail_numa_count || + use_cores_per_numa < avail_cores_per_numa ) ) + ? 0 /* asyncronous */ + : 1 /* synchronous, threads_coord[0] is process core */ ; + + // Determine binding coordinates for to-be-spawned threads so that + // threads may be bound to cores as they are spawned. + + const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa ); + + if ( thread_spawn_synchronous ) { + // Working synchronously and include process core as threads_coord[0]. + // Swap the NUMA coordinate of the process core with 0 + // Swap the CORE coordinate of the process core with 0 + for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) { + const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma ); + for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) { + const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore ); + for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) { + threads_coord[i].first = numa_coord ; + threads_coord[i].second = core_coord ; + } + } + } + } + else if ( use_numa_count < avail_numa_count ) { + // Working asynchronously and omit the process' NUMA region from the pool. + // Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 ) + const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ; + for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) { + const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ; + for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) { + const unsigned core_coord = icore ; + for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) { + threads_coord[i].first = numa_coord ; + threads_coord[i].second = core_coord ; + } + } + } + } + else if ( use_cores_per_numa < avail_cores_per_numa ) { + // Working asynchronously and omit the process' core from the pool. + // Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 ) + const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ; + for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) { + const unsigned numa_coord = inuma ; + for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) { + const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ; + for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) { + threads_coord[i].first = numa_coord ; + threads_coord[i].second = core_coord ; + } + } + } + } + + return thread_spawn_synchronous ; +} + +} /* namespace hwloc */ +} /* namespace Kokkos */ + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#if defined( KOKKOS_HAVE_HWLOC ) + +#include <iostream> +#include <sstream> +#include <stdexcept> + +/*--------------------------------------------------------------------------*/ +/* Third Party Libraries */ + +/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */ +#include <hwloc.h> + +#define REQUIRED_HWLOC_API_VERSION 0x000010300 + +#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION +#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater" +#endif + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace hwloc { +namespace { + +#if DEBUG_PRINT + +inline +void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap ) +{ + s << "{" ; + for ( int i = hwloc_bitmap_first( bitmap ) ; + -1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) { + s << " " << i ; + } + s << " }" ; +} + +#endif + +enum { MAX_CORE = 1024 }; + +std::pair<unsigned,unsigned> s_core_topology(0,0); +unsigned s_core_capacity(0); +hwloc_topology_t s_hwloc_topology(0); +hwloc_bitmap_t s_hwloc_location(0); +hwloc_bitmap_t s_process_binding(0); +hwloc_bitmap_t s_core[ MAX_CORE ]; +bool s_can_bind_threads(true); + +struct Sentinel { + ~Sentinel(); + Sentinel(); +}; + +bool sentinel() +{ + static Sentinel self ; + + if ( 0 == s_hwloc_topology ) { + std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ; + std::cerr.flush(); + } + + return 0 != s_hwloc_topology ; +} + +Sentinel::~Sentinel() +{ + hwloc_topology_destroy( s_hwloc_topology ); + hwloc_bitmap_free( s_process_binding ); + hwloc_bitmap_free( s_hwloc_location ); + + s_core_topology.first = 0 ; + s_core_topology.second = 0 ; + s_core_capacity = 0 ; + s_hwloc_topology = 0 ; + s_hwloc_location = 0 ; + s_process_binding = 0 ; +} + +Sentinel::Sentinel() +{ +#if defined(__MIC__) + static const bool remove_core_0 = true ; +#else + static const bool remove_core_0 = false ; +#endif + + s_core_topology = std::pair<unsigned,unsigned>(0,0); + s_core_capacity = 0 ; + s_hwloc_topology = 0 ; + s_hwloc_location = 0 ; + s_process_binding = 0 ; + + for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ; + + hwloc_topology_init( & s_hwloc_topology ); + hwloc_topology_load( s_hwloc_topology ); + + s_hwloc_location = hwloc_bitmap_alloc(); + s_process_binding = hwloc_bitmap_alloc(); + + hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS ); + + if ( hwloc_bitmap_iszero( s_process_binding ) ) { + std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl; + const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU ); + int num_pu = 1; + if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) { + num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth ); + } + else { + std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl; + num_pu = 1; + } + hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1); + s_can_bind_threads = false; + } + + + if ( remove_core_0 ) { + + const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 ); + + if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + + hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc(); + + hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset ); + + bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology , + s_process_no_core_zero , + HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT ); + + if ( ok ) { + hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS ); + + ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero ); + } + + hwloc_bitmap_free( s_process_no_core_zero ); + + if ( ! ok ) { + std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ; + } + } + } + + // Choose a hwloc object type for the NUMA level, which may not exist. + + hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ; + + { + // Object types to search, in order. + static const hwloc_obj_type_t candidate_root_type[] = + { HWLOC_OBJ_NODE /* NUMA region */ + , HWLOC_OBJ_SOCKET /* hardware socket */ + , HWLOC_OBJ_MACHINE /* local machine */ + }; + + enum { CANDIDATE_ROOT_TYPE_COUNT = + sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) }; + + for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) { + if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) { + root_type = candidate_root_type[k] ; + } + } + } + + // Determine which of these 'root' types are available to this process. + // The process may have been bound (e.g., by MPI) to a subset of these root types. + // Determine current location of the master (calling) process> + + hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc(); + + hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD ); + + const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type ); + + unsigned root_base = max_root ; + unsigned root_count = 0 ; + unsigned core_per_root = 0 ; + unsigned pu_per_core = 0 ; + bool symmetric = true ; + + for ( unsigned i = 0 ; i < max_root ; ++i ) { + + const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i ); + + if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { + + ++root_count ; + + // Remember which root (NUMA) object the master thread is running on. + // This will be logical NUMA rank #0 for this process. + + if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) { + root_base = i ; + } + + // Count available cores: + + const unsigned max_core = + hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , + root->allowed_cpuset , + HWLOC_OBJ_CORE ); + + unsigned core_count = 0 ; + + for ( unsigned j = 0 ; j < max_core ; ++j ) { + + const hwloc_obj_t core = + hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , + root->allowed_cpuset , + HWLOC_OBJ_CORE , j ); + + // If process' cpuset intersects core's cpuset then process can access this core. + // Must use intersection instead of inclusion because the Intel-Phi + // MPI may bind the process to only one of the core's hyperthreads. + // + // Assumption: if the process can access any hyperthread of the core + // then it has ownership of the entire core. + // This assumes that it would be performance-detrimental + // to spawn more than one MPI process per core and use nested threading. + + if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + + ++core_count ; + + const unsigned pu_count = + hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , + core->allowed_cpuset , + HWLOC_OBJ_PU ); + + if ( pu_per_core == 0 ) pu_per_core = pu_count ; + + // Enforce symmetry by taking the minimum: + + pu_per_core = std::min( pu_per_core , pu_count ); + + if ( pu_count != pu_per_core ) symmetric = false ; + } + } + + if ( 0 == core_per_root ) core_per_root = core_count ; + + // Enforce symmetry by taking the minimum: + + core_per_root = std::min( core_per_root , core_count ); + + if ( core_count != core_per_root ) symmetric = false ; + } + } + + s_core_topology.first = root_count ; + s_core_topology.second = core_per_root ; + s_core_capacity = pu_per_core ; + + // Fill the 's_core' array for fast mapping from a core coordinate to the + // hwloc cpuset object required for thread location querying and binding. + + for ( unsigned i = 0 ; i < max_root ; ++i ) { + + const unsigned root_rank = ( i + root_base ) % max_root ; + + const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank ); + + if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { + + const unsigned max_core = + hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , + root->allowed_cpuset , + HWLOC_OBJ_CORE ); + + unsigned core_count = 0 ; + + for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) { + + const hwloc_obj_t core = + hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , + root->allowed_cpuset , + HWLOC_OBJ_CORE , j ); + + if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + + s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ; + + ++core_count ; + } + } + } + } + + hwloc_bitmap_free( proc_cpuset_location ); + + if ( ! symmetric ) { + std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology." + << std::endl ; + } +} + + +} // namespace + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +bool available() +{ return true ; } + +unsigned get_available_numa_count() +{ sentinel(); return s_core_topology.first ; } + +unsigned get_available_cores_per_numa() +{ sentinel(); return s_core_topology.second ; } + +unsigned get_available_threads_per_core() +{ sentinel(); return s_core_capacity ; } + +bool can_bind_threads() +{ sentinel(); return s_can_bind_threads; } + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +unsigned bind_this_thread( + const unsigned coordinate_count , + std::pair<unsigned,unsigned> coordinate[] ) +{ + unsigned i = 0 ; + + try { + const std::pair<unsigned,unsigned> current = get_this_thread_coordinate(); + + // Match one of the requests: + for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i ); + + if ( coordinate_count == i ) { + // Match the first request (typically NUMA): + for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i ); + } + + if ( coordinate_count == i ) { + // Match any unclaimed request: + for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i ); + } + + if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) { + // Failed to bind: + i = ~0u ; + } + + if ( i < coordinate_count ) { + +#if DEBUG_PRINT + if ( current != coordinate[i] ) { + std::cout << " bind_this_thread: rebinding from (" + << current.first << "," + << current.second + << ") to (" + << coordinate[i].first << "," + << coordinate[i].second + << ")" << std::endl ; + } +#endif + + coordinate[i].first = ~0u ; + coordinate[i].second = ~0u ; + } + } + catch( ... ) { + i = ~0u ; + } + + return i ; +} + + +bool bind_this_thread( const std::pair<unsigned,unsigned> coord ) +{ + if ( ! sentinel() ) return false ; + +#if DEBUG_PRINT + + std::cout << "Kokkos::bind_this_thread() at " ; + + hwloc_get_last_cpu_location( s_hwloc_topology , + s_hwloc_location , HWLOC_CPUBIND_THREAD ); + + print_bitmap( std::cout , s_hwloc_location ); + + std::cout << " to " ; + + print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] ); + + std::cout << std::endl ; + +#endif + + // As safe and fast as possible. + // Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'. + return coord.first < s_core_topology.first && + coord.second < s_core_topology.second && + 0 == hwloc_set_cpubind( s_hwloc_topology , + s_core[ coord.second + coord.first * s_core_topology.second ] , + HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT ); +} + +bool unbind_this_thread() +{ + if ( ! sentinel() ) return false ; + +#define HWLOC_DEBUG_PRINT 0 + +#if HWLOC_DEBUG_PRINT + + std::cout << "Kokkos::unbind_this_thread() from " ; + + hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); + + print_bitmap( std::cout , s_hwloc_location ); + +#endif + + const bool result = + s_hwloc_topology && + 0 == hwloc_set_cpubind( s_hwloc_topology , + s_process_binding , + HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT ); + +#if HWLOC_DEBUG_PRINT + + std::cout << " to " ; + + hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); + + print_bitmap( std::cout , s_hwloc_location ); + + std::cout << std::endl ; + +#endif + + return result ; + +#undef HWLOC_DEBUG_PRINT + +} + +//---------------------------------------------------------------------------- + +std::pair<unsigned,unsigned> get_this_thread_coordinate() +{ + std::pair<unsigned,unsigned> coord(0u,0u); + + if ( ! sentinel() ) return coord ; + + const unsigned n = s_core_topology.first * s_core_topology.second ; + + // Using the pre-allocated 's_hwloc_location' to avoid memory + // allocation by this thread. This call is NOT thread-safe. + hwloc_get_last_cpu_location( s_hwloc_topology , + s_hwloc_location , HWLOC_CPUBIND_THREAD ); + + unsigned i = 0 ; + + while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ; + + if ( i < n ) { + coord.first = i / s_core_topology.second ; + coord.second = i % s_core_topology.second ; + } + + return coord ; +} + +//---------------------------------------------------------------------------- + +} /* namespace hwloc */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#else /* ! defined( KOKKOS_HAVE_HWLOC ) */ + +namespace Kokkos { +namespace hwloc { + +bool available() { return false ; } +bool can_bind_threads() { return false ; } + +unsigned get_available_numa_count() { return 1 ; } +unsigned get_available_cores_per_numa() { return 1 ; } +unsigned get_available_threads_per_core() { return 1 ; } + +unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] ) +{ return ~0 ; } + +bool bind_this_thread( const std::pair<unsigned,unsigned> ) +{ return false ; } + +bool unbind_this_thread() +{ return true ; } + +std::pair<unsigned,unsigned> get_this_thread_coordinate() +{ return std::pair<unsigned,unsigned>(0,0); } + +} // namespace hwloc +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif + + diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aff7f29f89883d199ecf65feb86c89328530413b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Macros.hpp> +#include <impl/Kokkos_spinwait.hpp> + +/*--------------------------------------------------------------------------*/ + +#if ( KOKKOS_ENABLE_ASM ) + #if defined( __arm__ ) || defined( __aarch64__ ) + /* No-operation instruction to idle the thread. */ + #define YIELD asm volatile("nop") + #else + /* Pause instruction to prevent excess processor bus usage */ + #define YIELD asm volatile("pause\n":::"memory") + #endif +#elif defined ( KOKKOS_HAVE_WINTHREAD ) + #include <process.h> + #define YIELD Sleep(0) +#elif defined ( _WIN32) && defined (_MSC_VER) + /* Windows w/ Visual Studio */ + #define NOMINMAX + #include <winsock2.h> + #include <windows.h> +#define YIELD YieldProcessor(); +#elif defined ( _WIN32 ) + /* Windows w/ Intel*/ + #define YIELD __asm__ __volatile__("pause\n":::"memory") +#else + #include <sched.h> + #define YIELD sched_yield() +#endif + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +void spinwait( volatile int & flag , const int value ) +{ + while ( value == flag ) { + YIELD ; + } +} +#endif + +} /* namespace Impl */ +} /* namespace Kokkos */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cc87771faefcb8ad7716842890dbec4a9c1219a1 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp @@ -0,0 +1,64 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_SPINWAIT_HPP +#define KOKKOS_SPINWAIT_HPP + +#include <Kokkos_Macros.hpp> + +namespace Kokkos { +namespace Impl { + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +void spinwait( volatile int & flag , const int value ); +#else +KOKKOS_INLINE_FUNCTION +void spinwait( volatile int & , const int ) {} +#endif + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_SPINWAIT_HPP */ + diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5bb2b672e124f3b282d760562514afb1719fd957 --- /dev/null +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -0,0 +1,105 @@ +# +# Add test-only library for gtest to be reused by all the subpackages +# + +SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) + +INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) +TRIBITS_ADD_LIBRARY( + kokkos_gtest + HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h + SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc + TESTONLY + ) + +# +# Define the tests +# + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF(Kokkos_ENABLE_Serial) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial + SOURCES UnitTestMain.cpp TestSerial.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_Pthread) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Threads + SOURCES UnitTestMain.cpp TestThreads.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP + SOURCES UnitTestMain.cpp TestOpenMP.cpp TestOpenMP_a.cpp TestOpenMP_b.cpp TestOpenMP_c.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_QTHREAD) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Qthread + SOURCES UnitTestMain.cpp TestQthread.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_Cuda) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Cuda + SOURCES UnitTestMain.cpp TestCuda.cpp TestCuda_a.cpp TestCuda_b.cpp TestCuda_c.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Default + SOURCES UnitTestMain.cpp TestDefaultDeviceType.cpp TestDefaultDeviceType_a.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest +) + +foreach(INITTESTS_NUM RANGE 1 16) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_DefaultInit_${INITTESTS_NUM} + SOURCES UnitTestMain.cpp TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest +) +endforeach(INITTESTS_NUM) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_HWLOC + SOURCES UnitTestMain.cpp TestHWLOC.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest +) + diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3d9d212c1ecdef658fdb9cf7d30fc542a6fb72d3 --- /dev/null +++ b/lib/kokkos/core/unit_test/Makefile @@ -0,0 +1,153 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../tpls/gtest + +vpath %.cpp ${KOKKOS_PATH}/core/unit_test +TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp) + +default: build_all + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda_c.o TestCuda_b.o TestCuda_a.o TestCuda.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP_c.o TestOpenMP_b.o TestOpenMP_a.o TestOpenMP.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_OpenMP + TEST_TARGETS += test-openmp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Serial + TEST_TARGETS += test-serial +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + OBJ_QTHREAD = TestQthread.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Qthread + TEST_TARGETS += test-qthread +endif + +OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o +TARGETS += KokkosCore_UnitTest_HWLOC +TEST_TARGETS += test-hwloc + +OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o UnitTestMain.o gtest-all.o +TARGETS += KokkosCore_UnitTest_Default +TEST_TARGETS += test-default + +NUM_INITTESTS = 16 +INITTESTS_NUMBERS := $(shell seq 1 ${NUM_INITTESTS}) +INITTESTS_TARGETS := $(addprefix KokkosCore_UnitTest_DefaultDeviceTypeInit_,${INITTESTS_NUMBERS}) +TARGETS += ${INITTESTS_TARGETS} +INITTESTS_TEST_TARGETS := $(addprefix test-default-init-,${INITTESTS_NUMBERS}) +TEST_TARGETS += ${INITTESTS_TEST_TARGETS} + +OBJ_SYNCHRONIC = TestSynchronic.o UnitTestMain.o gtest-all.o +TARGETS += KokkosCore_UnitTest_Synchronic +TEST_TARGETS += test-synchronic + +KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Cuda + +KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Threads + +KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_OpenMP + +KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Serial + +KokkosCore_UnitTest_Qthread: $(OBJ_QTHREAD) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREAD) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthread + +KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_HWLOC + +KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker + +KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Default + +${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDeviceTypeInit_%.o UnitTestMain.o gtest-all.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) TestDefaultDeviceTypeInit_$*.o UnitTestMain.o gtest-all.o $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_DefaultDeviceTypeInit_$* + +KokkosCore_UnitTest_Synchronic: $(OBJ_SYNCHRONIC) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SYNCHRONIC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Synchronic + +test-cuda: KokkosCore_UnitTest_Cuda + ./KokkosCore_UnitTest_Cuda + +test-threads: KokkosCore_UnitTest_Threads + ./KokkosCore_UnitTest_Threads + +test-openmp: KokkosCore_UnitTest_OpenMP + ./KokkosCore_UnitTest_OpenMP + +test-serial: KokkosCore_UnitTest_Serial + ./KokkosCore_UnitTest_Serial + +test-qthread: KokkosCore_UnitTest_Qthread + ./KokkosCore_UnitTest_Qthread + +test-hwloc: KokkosCore_UnitTest_HWLOC + ./KokkosCore_UnitTest_HWLOC + +test-allocationtracker: KokkosCore_UnitTest_AllocationTracker + ./KokkosCore_UnitTest_AllocationTracker + +test-default: KokkosCore_UnitTest_Default + ./KokkosCore_UnitTest_Default + +${INITTESTS_TEST_TARGETS}: test-default-init-%: KokkosCore_UnitTest_DefaultDeviceTypeInit_% + ./KokkosCore_UnitTest_DefaultDeviceTypeInit_$* + +test-synchronic: KokkosCore_UnitTest_Synchronic + ./KokkosCore_UnitTest_Synchronic + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(TEST_HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5388a60787cb9217a4436798d826dcc53f55d3f2 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TEST_AGGREGATE_HPP +#define TEST_AGGREGATE_HPP + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +/*--------------------------------------------------------------------------*/ + +#include <impl/KokkosExp_ViewArray.hpp> + +namespace Test { + +template< class DeviceType > +void TestViewAggregate() +{ + typedef Kokkos::Array<double,32> value_type ; + + typedef Kokkos::Experimental::Impl:: + ViewDataAnalysis< value_type * , Kokkos::LayoutLeft , value_type > + analysis_1d ; + + static_assert( std::is_same< typename analysis_1d::specialize , Kokkos::Array<> >::value , "" ); + + + typedef Kokkos::ViewTraits< value_type ** , DeviceType > a32_traits ; + typedef Kokkos::ViewTraits< typename a32_traits::scalar_array_type , DeviceType > flat_traits ; + + static_assert( std::is_same< typename a32_traits::specialize , Kokkos::Array<> >::value , "" ); + static_assert( std::is_same< typename a32_traits::value_type , value_type >::value , "" ); + static_assert( a32_traits::rank == 2 , "" ); + static_assert( a32_traits::rank_dynamic == 2 , "" ); + + static_assert( std::is_same< typename flat_traits::specialize , void >::value , "" ); + static_assert( flat_traits::rank == 3 , "" ); + static_assert( flat_traits::rank_dynamic == 2 , "" ); + static_assert( flat_traits::dimension::N2 == 32 , "" ); + + + typedef Kokkos::View< Kokkos::Array<double,32> ** , DeviceType > a32_type ; + + typedef typename a32_type::array_type a32_flat_type ; + + static_assert( std::is_same< typename a32_type::value_type , value_type >::value , "" ); + static_assert( std::is_same< typename a32_type::pointer_type , double * >::value , "" ); + static_assert( a32_type::Rank == 2 , "" ); + static_assert( a32_flat_type::Rank == 3 , "" ); + + a32_type x("test",4,5); + a32_flat_type y( x ); + + ASSERT_EQ( x.extent(0) , 4 ); + ASSERT_EQ( x.extent(1) , 5 ); + ASSERT_EQ( y.extent(0) , 4 ); + ASSERT_EQ( y.extent(1) , 5 ); + ASSERT_EQ( y.extent(2) , 32 ); +} + +} + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/lib/kokkos/core/unit_test/TestAggregateReduction.hpp b/lib/kokkos/core/unit_test/TestAggregateReduction.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bd05cd347b979e305becead88a898d27b0a7d4f8 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAggregateReduction.hpp @@ -0,0 +1,191 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TEST_AGGREGATE_REDUCTION_HPP +#define TEST_AGGREGATE_REDUCTION_HPP + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +namespace Test { + +template< typename T , unsigned N > +struct StaticArray { + T value[N] ; + + KOKKOS_INLINE_FUNCTION + StaticArray() = default; + + KOKKOS_INLINE_FUNCTION + StaticArray( const StaticArray & rhs ) = default; + + KOKKOS_INLINE_FUNCTION + operator T () { return value[0]; } + + KOKKOS_INLINE_FUNCTION + StaticArray & operator = ( const T & rhs ) + { + for ( unsigned i = 0 ; i < N ; ++i ) value[i] = rhs ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + StaticArray & operator = ( const StaticArray & rhs ) = default; + + KOKKOS_INLINE_FUNCTION + StaticArray operator * ( const StaticArray & rhs ) + { + StaticArray tmp ; + for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] * rhs.value[i] ; + return tmp ; + } + + KOKKOS_INLINE_FUNCTION + StaticArray operator + ( const StaticArray & rhs ) + { + StaticArray tmp ; + for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] + rhs.value[i] ; + return tmp ; + } + + KOKKOS_INLINE_FUNCTION + StaticArray & operator += ( const StaticArray & rhs ) + { + for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + void operator += ( const volatile StaticArray & rhs ) volatile + { + for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ; + } +}; + +static_assert(std::is_trivial<StaticArray<int, 4>>::value, "Not trivial"); + +template< typename T , class Space > +struct DOT { + typedef T value_type ; + typedef Space execution_space ; + + Kokkos::View< value_type * , Space > a ; + Kokkos::View< value_type * , Space > b ; + + DOT( const Kokkos::View< value_type * , Space > arg_a + , const Kokkos::View< value_type * , Space > arg_b + ) + : a( arg_a ), b( arg_b ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i , value_type & update ) const + { + update += a(i) * b(i); + } +}; + +template< typename T , class Space > +struct FILL { + typedef T value_type ; + typedef Space execution_space ; + + Kokkos::View< value_type * , Space > a ; + Kokkos::View< value_type * , Space > b ; + + FILL( const Kokkos::View< value_type * , Space > & arg_a + , const Kokkos::View< value_type * , Space > & arg_b + ) + : a( arg_a ), b( arg_b ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i ) const + { + a(i) = i % 2 ? i + 1 : 1 ; + b(i) = i % 2 ? 1 : i + 1 ; + } +}; + +template< class Space > +void TestViewAggregateReduction() +{ + +#if ! KOKKOS_USING_EXP_VIEW + + const int count = 2 ; + const long result = count % 2 ? ( count * ( ( count + 1 ) / 2 ) ) + : ( ( count / 2 ) * ( count + 1 ) ); + + Kokkos::View< long * , Space > a("a",count); + Kokkos::View< long * , Space > b("b",count); + Kokkos::View< StaticArray<long,4> * , Space > a4("a4",count); + Kokkos::View< StaticArray<long,4> * , Space > b4("b4",count); + Kokkos::View< StaticArray<long,10> * , Space > a10("a10",count); + Kokkos::View< StaticArray<long,10> * , Space > b10("b10",count); + + Kokkos::parallel_for( count , FILL<long,Space>(a,b) ); + Kokkos::parallel_for( count , FILL< StaticArray<long,4> , Space >(a4,b4) ); + Kokkos::parallel_for( count , FILL< StaticArray<long,10> , Space >(a10,b10) ); + + long r = 0; + StaticArray<long,4> r4 ; + StaticArray<long,10> r10 ; + + Kokkos::parallel_reduce( count , DOT<long,Space>(a,b) , r ); + Kokkos::parallel_reduce( count , DOT< StaticArray<long,4> , Space >(a4,b4) , r4 ); + Kokkos::parallel_reduce( count , DOT< StaticArray<long,10> , Space >(a10,b10) , r10 ); + + ASSERT_EQ( result , r ); + for ( int i = 0 ; i < 10 ; ++i ) { ASSERT_EQ( result , r10.value[i] ); } + for ( int i = 0 ; i < 4 ; ++i ) { ASSERT_EQ( result , r4.value[i] ); } + +#endif + +} + +} + +#endif /* #ifndef TEST_AGGREGATE_REDUCTION_HPP */ + diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e948723574b48b2a64ee66c487062e34c0ccf29b --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -0,0 +1,402 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +namespace TestAtomic { + +// Struct for testing arbitrary size atomics + +template<int N> +struct SuperScalar { + double val[N]; + + KOKKOS_INLINE_FUNCTION + SuperScalar() { + for(int i=0; i<N; i++) + val[i] = 0.0; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar(const SuperScalar& src) { + for(int i=0; i<N; i++) + val[i] = src.val[i]; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar(const volatile SuperScalar& src) { + for(int i=0; i<N; i++) + val[i] = src.val[i]; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar& operator = (const SuperScalar& src) { + for(int i=0; i<N; i++) + val[i] = src.val[i]; + return *this; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar& operator = (const volatile SuperScalar& src) { + for(int i=0; i<N; i++) + val[i] = src.val[i]; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator = (const SuperScalar& src) volatile { + for(int i=0; i<N; i++) + val[i] = src.val[i]; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar operator + (const SuperScalar& src) { + SuperScalar tmp = *this; + for(int i=0; i<N; i++) + tmp.val[i] += src.val[i]; + return tmp; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar& operator += (const double& src) { + for(int i=0; i<N; i++) + val[i] += 1.0*(i+1)*src; + return *this; + } + + KOKKOS_INLINE_FUNCTION + SuperScalar& operator += (const SuperScalar& src) { + for(int i=0; i<N; i++) + val[i] += src.val[i]; + return *this; + } + + KOKKOS_INLINE_FUNCTION + bool operator == (const SuperScalar& src) { + bool compare = true; + for(int i=0; i<N; i++) + compare = compare && ( val[i] == src.val[i]); + return compare; + } + + KOKKOS_INLINE_FUNCTION + bool operator != (const SuperScalar& src) { + bool compare = true; + for(int i=0; i<N; i++) + compare = compare && ( val[i] == src.val[i]); + return !compare; + } + + + + KOKKOS_INLINE_FUNCTION + SuperScalar(const double& src) { + for(int i=0; i<N; i++) + val[i] = 1.0 * (i+1) * src; + } + +}; + +template<int N> +std::ostream& operator<<(std::ostream& os, const SuperScalar<N>& dt) +{ + os << "{ "; + for(int i=0;i<N-1;i++) + os << dt.val[i] << ", "; + os << dt.val[N-1] << "}"; + return os; +} + +template<class T,class DEVICE_TYPE> +struct ZeroFunctor { + typedef DEVICE_TYPE execution_space; + typedef typename Kokkos::View<T,execution_space> type; + typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + data() = 0; + } +}; + +//--------------------------------------------------- +//--------------atomic_fetch_add--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct AddFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_add(&data(),(T)1); + } +}; + +template<class T, class execution_space > +T AddLoop(int loop) { + struct ZeroFunctor<T,execution_space> f_zero; + typename ZeroFunctor<T,execution_space>::type data("Data"); + typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + execution_space::fence(); + + struct AddFunctor<T,execution_space> f_add; + f_add.data = data; + Kokkos::parallel_for(loop,f_add); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T AddLoopSerial(int loop) { + T* data = new T[1]; + data[0] = 0; + + for(int i=0;i<loop;i++) + *data+=(T)1; + + T val = *data; + delete [] data; + return val; +} + +//------------------------------------------------------ +//--------------atomic_compare_exchange----------------- +//------------------------------------------------------ + +template<class T,class DEVICE_TYPE> +struct CASFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + T old = data(); + T newval, assumed; + do { + assumed = old; + newval = assumed + (T)1; + old = Kokkos::atomic_compare_exchange(&data(), assumed, newval); + } + while( old != assumed ); + } +}; + +template<class T, class execution_space > +T CASLoop(int loop) { + struct ZeroFunctor<T,execution_space> f_zero; + typename ZeroFunctor<T,execution_space>::type data("Data"); + typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + execution_space::fence(); + + struct CASFunctor<T,execution_space> f_cas; + f_cas.data = data; + Kokkos::parallel_for(loop,f_cas); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + + return val; +} + +template<class T> +T CASLoopSerial(int loop) { + T* data = new T[1]; + data[0] = 0; + + for(int i=0;i<loop;i++) { + T assumed; + T newval; + T old; + do { + assumed = *data; + newval = assumed + (T)1; + old = *data; + *data = newval; + } + while(!(assumed==old)); + } + + T val = *data; + delete [] data; + return val; +} + +//---------------------------------------------- +//--------------atomic_exchange----------------- +//---------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct ExchFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data, data2; + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + T old = Kokkos::atomic_exchange(&data(),(T)i); + Kokkos::atomic_fetch_add(&data2(),old); + } +}; + +template<class T, class execution_space > +T ExchLoop(int loop) { + struct ZeroFunctor<T,execution_space> f_zero; + typename ZeroFunctor<T,execution_space>::type data("Data"); + typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); + f_zero.data = data; + Kokkos::parallel_for(1,f_zero); + execution_space::fence(); + + typename ZeroFunctor<T,execution_space>::type data2("Data"); + typename ZeroFunctor<T,execution_space>::h_type h_data2("HData"); + f_zero.data = data2; + Kokkos::parallel_for(1,f_zero); + execution_space::fence(); + + struct ExchFunctor<T,execution_space> f_exch; + f_exch.data = data; + f_exch.data2 = data2; + Kokkos::parallel_for(loop,f_exch); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy(h_data2,data2); + T val = h_data() + h_data2(); + + return val; +} + +template<class T> +T ExchLoopSerial(typename std::conditional<!std::is_same<T,Kokkos::complex<double> >::value,int,void>::type loop) { + T* data = new T[1]; + T* data2 = new T[1]; + data[0] = 0; + data2[0] = 0; + for(int i=0;i<loop;i++) { + T old = *data; + *data=(T) i; + *data2+=old; + } + + T val = *data2 + *data; + delete [] data; + delete [] data2; + return val; +} + +template<class T> +T ExchLoopSerial(typename std::conditional<std::is_same<T,Kokkos::complex<double> >::value,int,void>::type loop) { + T* data = new T[1]; + T* data2 = new T[1]; + data[0] = 0; + data2[0] = 0; + for(int i=0;i<loop;i++) { + T old = *data; + data->real() = (static_cast<double>(i)); + data->imag() = 0; + *data2+=old; + } + + T val = *data2 + *data; + delete [] data; + delete [] data2; + return val; +} + +template<class T, class DeviceType > +T LoopVariant(int loop, int test) { + switch (test) { + case 1: return AddLoop<T,DeviceType>(loop); + case 2: return CASLoop<T,DeviceType>(loop); + case 3: return ExchLoop<T,DeviceType>(loop); + } + return 0; +} + +template<class T> +T LoopVariantSerial(int loop, int test) { + switch (test) { + case 1: return AddLoopSerial<T>(loop); + case 2: return CASLoopSerial<T>(loop); + case 3: return ExchLoopSerial<T>(loop); + } + return 0; +} + +template<class T,class DeviceType> +bool Loop(int loop, int test) +{ + T res = LoopVariant<T,DeviceType>(loop,test); + T resSerial = LoopVariantSerial<T>(loop,test); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = " + << test << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + + return passed ; +} + +} + diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aee4bda06cea276e12fca664a48c81a428445bcd --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -0,0 +1,841 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +namespace TestAtomicOperations { + +//----------------------------------------------- +//--------------zero_functor--------------------- +//----------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct ZeroFunctor { + typedef DEVICE_TYPE execution_space; + typedef typename Kokkos::View<T,execution_space> type; + typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + data() = 0; + } +}; + +//----------------------------------------------- +//--------------init_functor--------------------- +//----------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct InitFunctor { + typedef DEVICE_TYPE execution_space; + typedef typename Kokkos::View<T,execution_space> type; + typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + type data; + T init_value ; + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + data() = init_value; + } + + InitFunctor(T _init_value) : init_value(_init_value) {} +}; + + +//--------------------------------------------------- +//--------------atomic_fetch_max--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct MaxFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + //Kokkos::atomic_fetch_max(&data(),(T)1); + Kokkos::atomic_fetch_max(&data(),(T)i1); + } + MaxFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T MaxAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct MaxFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T MaxAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = (i0 > i1 ? i0 : i1) ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool MaxAtomicTest(T i0, T i1) +{ + T res = MaxAtomic<T,DeviceType>(i0,i1); + T resSerial = MaxAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = MaxAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_min--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct MinFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_min(&data(),(T)i1); + } + MinFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T MinAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct MinFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T MinAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = (i0 < i1 ? i0 : i1) ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool MinAtomicTest(T i0, T i1) +{ + T res = MinAtomic<T,DeviceType>(i0,i1); + T resSerial = MinAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = MinAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_mul--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct MulFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_mul(&data(),(T)i1); + } + MulFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T MulAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct MulFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T MulAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0*i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool MulAtomicTest(T i0, T i1) +{ + T res = MulAtomic<T,DeviceType>(i0,i1); + T resSerial = MulAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = MulAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_div--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct DivFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_div(&data(),(T)i1); + } + DivFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T DivAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct DivFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T DivAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0/i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool DivAtomicTest(T i0, T i1) +{ + T res = DivAtomic<T,DeviceType>(i0,i1); + T resSerial = DivAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = DivAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_mod--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct ModFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_mod(&data(),(T)i1); + } + ModFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T ModAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct ModFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T ModAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0%i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool ModAtomicTest(T i0, T i1) +{ + T res = ModAtomic<T,DeviceType>(i0,i1); + T resSerial = ModAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = ModAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_and--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct AndFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_and(&data(),(T)i1); + } + AndFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T AndAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct AndFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T AndAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0&i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool AndAtomicTest(T i0, T i1) +{ + T res = AndAtomic<T,DeviceType>(i0,i1); + T resSerial = AndAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = AndAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_or---------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct OrFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_or(&data(),(T)i1); + } + OrFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T OrAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct OrFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T OrAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0|i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool OrAtomicTest(T i0, T i1) +{ + T res = OrAtomic<T,DeviceType>(i0,i1); + T resSerial = OrAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = OrAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_xor--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct XorFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_xor(&data(),(T)i1); + } + XorFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T XorAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct XorFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T XorAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0^i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool XorAtomicTest(T i0, T i1) +{ + T res = XorAtomic<T,DeviceType>(i0,i1); + T resSerial = XorAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = XorAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_lshift--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct LShiftFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_lshift(&data(),(T)i1); + } + LShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T LShiftAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct LShiftFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T LShiftAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0<<i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool LShiftAtomicTest(T i0, T i1) +{ + T res = LShiftAtomic<T,DeviceType>(i0,i1); + T resSerial = LShiftAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = LShiftAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_fetch_rshift--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct RShiftFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + T i1; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_fetch_rshift(&data(),(T)i1); + } + RShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} +}; + +template<class T, class execution_space > +T RShiftAtomic(T i0 , T i1) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct RShiftFunctor<T,execution_space> f(i0,i1); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T RShiftAtomicCheck(T i0 , T i1) { + T* data = new T[1]; + data[0] = 0; + + *data = i0>>i1 ; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool RShiftAtomicTest(T i0, T i1) +{ + T res = RShiftAtomic<T,DeviceType>(i0,i1); + T resSerial = RShiftAtomicCheck<T>(i0,i1); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = RShiftAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + + +//--------------------------------------------------- +//--------------atomic_test_control------------------ +//--------------------------------------------------- + +template<class T,class DeviceType> +bool AtomicOperationsTestIntegralType( int i0 , int i1 , int test ) +{ + switch (test) { + case 1: return MaxAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 2: return MinAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 3: return MulAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 4: return DivAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 5: return ModAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 6: return AndAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 7: return OrAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 8: return XorAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 9: return LShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 10: return RShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + } + return 0; +} + +template<class T,class DeviceType> +bool AtomicOperationsTestNonIntegralType( int i0 , int i1 , int test ) +{ + switch (test) { + case 1: return MaxAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 2: return MinAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 3: return MulAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 4: return DivAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + } + return 0; +} + +} // namespace + diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2d6349776b33f3af85f6feb3fab91331d7a6de0e --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -0,0 +1,334 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <Kokkos_Core.hpp> + +namespace TestCXX11 { + +template<class DeviceType> +struct FunctorAddTest{ + typedef Kokkos::View<double**,DeviceType> view_type; + view_type a_, b_; + typedef DeviceType execution_space; + FunctorAddTest(view_type & a, view_type &b):a_(a),b_(b) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + b_(i,0) = a_(i,1) + a_(i,2); + b_(i,1) = a_(i,0) - a_(i,3); + b_(i,2) = a_(i,4) + a_(i,0); + b_(i,3) = a_(i,2) - a_(i,1); + b_(i,4) = a_(i,3) + a_(i,4); + } + + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; + KOKKOS_INLINE_FUNCTION + void operator() (const team_member & dev) const { + const int begin = dev.league_rank() * 4 ; + const int end = begin + 4 ; + for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { + b_(i,0) = a_(i,1) + a_(i,2); + b_(i,1) = a_(i,0) - a_(i,3); + b_(i,2) = a_(i,4) + a_(i,0); + b_(i,3) = a_(i,2) - a_(i,1); + b_(i,4) = a_(i,3) + a_(i,4); + } + } +}; + +template<class DeviceType, bool PWRTest> +double AddTestFunctor() { + + typedef Kokkos::TeamPolicy<DeviceType> policy_type ; + + Kokkos::View<double**,DeviceType> a("A",100,5); + Kokkos::View<double**,DeviceType> b("B",100,5); + typename Kokkos::View<double**,DeviceType>::HostMirror h_a = Kokkos::create_mirror_view(a); + typename Kokkos::View<double**,DeviceType>::HostMirror h_b = Kokkos::create_mirror_view(b); + + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + } + Kokkos::deep_copy(a,h_a); + + if(PWRTest==false) + Kokkos::parallel_for(100,FunctorAddTest<DeviceType>(a,b)); + else + Kokkos::parallel_for(policy_type(25,Kokkos::AUTO),FunctorAddTest<DeviceType>(a,b)); + Kokkos::deep_copy(h_b,b); + + double result = 0; + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + result += h_b(i,j); + } + + return result; +} + + +#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +template<class DeviceType, bool PWRTest> +double AddTestLambda() { + + Kokkos::View<double**,DeviceType> a("A",100,5); + Kokkos::View<double**,DeviceType> b("B",100,5); + typename Kokkos::View<double**,DeviceType>::HostMirror h_a = Kokkos::create_mirror_view(a); + typename Kokkos::View<double**,DeviceType>::HostMirror h_b = Kokkos::create_mirror_view(b); + + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + } + Kokkos::deep_copy(a,h_a); + + if(PWRTest==false) { + Kokkos::parallel_for(100,KOKKOS_LAMBDA(const int& i) { + b(i,0) = a(i,1) + a(i,2); + b(i,1) = a(i,0) - a(i,3); + b(i,2) = a(i,4) + a(i,0); + b(i,3) = a(i,2) - a(i,1); + b(i,4) = a(i,3) + a(i,4); + }); + } else { + typedef Kokkos::TeamPolicy<DeviceType> policy_type ; + typedef typename policy_type::member_type team_member ; + + policy_type policy(25,Kokkos::AUTO); + + Kokkos::parallel_for(policy,KOKKOS_LAMBDA(const team_member & dev) { + const int begin = dev.league_rank() * 4 ; + const int end = begin + 4 ; + for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { + b(i,0) = a(i,1) + a(i,2); + b(i,1) = a(i,0) - a(i,3); + b(i,2) = a(i,4) + a(i,0); + b(i,3) = a(i,2) - a(i,1); + b(i,4) = a(i,3) + a(i,4); + } + }); + } + Kokkos::deep_copy(h_b,b); + + double result = 0; + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + result += h_b(i,j); + } + + return result; +} + +#else +template<class DeviceType, bool PWRTest> +double AddTestLambda() { + return AddTestFunctor<DeviceType,PWRTest>(); +} +#endif + + +template<class DeviceType> +struct FunctorReduceTest{ + typedef Kokkos::View<double**,DeviceType> view_type; + view_type a_; + typedef DeviceType execution_space; + typedef double value_type; + FunctorReduceTest(view_type & a):a_(a) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, value_type& sum) const { + sum += a_(i,1) + a_(i,2); + sum += a_(i,0) - a_(i,3); + sum += a_(i,4) + a_(i,0); + sum += a_(i,2) - a_(i,1); + sum += a_(i,3) + a_(i,4); + } + + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; + + KOKKOS_INLINE_FUNCTION + void operator() (const team_member & dev, value_type& sum) const { + const int begin = dev.league_rank() * 4 ; + const int end = begin + 4 ; + for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { + sum += a_(i,1) + a_(i,2); + sum += a_(i,0) - a_(i,3); + sum += a_(i,4) + a_(i,0); + sum += a_(i,2) - a_(i,1); + sum += a_(i,3) + a_(i,4); + } + } + KOKKOS_INLINE_FUNCTION + void init(value_type& update) const {update = 0.0;} + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& update, volatile value_type const& input) const {update += input;} +}; + +template<class DeviceType, bool PWRTest> +double ReduceTestFunctor() { + + typedef Kokkos::TeamPolicy<DeviceType> policy_type ; + typedef Kokkos::View<double**,DeviceType> view_type ; + typedef Kokkos::View<double,typename view_type::host_mirror_space,Kokkos::MemoryUnmanaged> unmanaged_result ; + + view_type a("A",100,5); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); + + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + } + Kokkos::deep_copy(a,h_a); + + double result = 0.0; + if(PWRTest==false) + Kokkos::parallel_reduce(100,FunctorReduceTest<DeviceType>(a), unmanaged_result( & result )); + else + Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),FunctorReduceTest<DeviceType>(a), unmanaged_result( & result )); + + return result; +} + +#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +template<class DeviceType, bool PWRTest> +double ReduceTestLambda() { + + typedef Kokkos::TeamPolicy<DeviceType> policy_type ; + typedef Kokkos::View<double**,DeviceType> view_type ; + typedef Kokkos::View<double,typename view_type::host_mirror_space,Kokkos::MemoryUnmanaged> unmanaged_result ; + + view_type a("A",100,5); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); + + for(int i=0;i<100;i++) { + for(int j=0;j<5;j++) + h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + } + Kokkos::deep_copy(a,h_a); + + double result = 0.0; + + if(PWRTest==false) { + Kokkos::parallel_reduce(100,KOKKOS_LAMBDA(const int& i, double& sum) { + sum += a(i,1) + a(i,2); + sum += a(i,0) - a(i,3); + sum += a(i,4) + a(i,0); + sum += a(i,2) - a(i,1); + sum += a(i,3) + a(i,4); + }, unmanaged_result( & result ) ); + } else { + typedef typename policy_type::member_type team_member ; + Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),KOKKOS_LAMBDA(const team_member & dev, double& sum) { + const int begin = dev.league_rank() * 4 ; + const int end = begin + 4 ; + for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { + sum += a(i,1) + a(i,2); + sum += a(i,0) - a(i,3); + sum += a(i,4) + a(i,0); + sum += a(i,2) - a(i,1); + sum += a(i,3) + a(i,4); + } + }, unmanaged_result( & result ) ); + } + + return result; +} + +#else +template<class DeviceType, bool PWRTest> +double ReduceTestLambda() { + return ReduceTestFunctor<DeviceType,PWRTest>(); +} +#endif + +template<class DeviceType> +double TestVariantLambda(int test) { + switch (test) { + case 1: return AddTestLambda<DeviceType,false>(); + case 2: return AddTestLambda<DeviceType,true>(); + case 3: return ReduceTestLambda<DeviceType,false>(); + case 4: return ReduceTestLambda<DeviceType,true>(); + } + return 0; +} + + +template<class DeviceType> +double TestVariantFunctor(int test) { + switch (test) { + case 1: return AddTestFunctor<DeviceType,false>(); + case 2: return AddTestFunctor<DeviceType,true>(); + case 3: return ReduceTestFunctor<DeviceType,false>(); + case 4: return ReduceTestFunctor<DeviceType,true>(); + } + return 0; +} + +template<class DeviceType> +bool Test(int test) { + +#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + double res_functor = TestVariantFunctor<DeviceType>(test); + double res_lambda = TestVariantLambda<DeviceType>(test); + + char testnames[5][256] = {" " + ,"AddTest","AddTest TeamPolicy" + ,"ReduceTest","ReduceTest TeamPolicy" + }; + bool passed = true; + + if ( res_functor != res_lambda ) { + passed = false; + + std::cout << "CXX11 ( test = '" + << testnames[test] << "' FAILED : " + << res_functor << " != " << res_lambda + << std::endl ; + } + + return passed ; +#else + return true; +#endif +} + +} diff --git a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp new file mode 100644 index 0000000000000000000000000000000000000000..359e17a44f1642d630b97987f8d049fc3217a9fb --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <Kokkos_Core.hpp> + +#ifndef TESTCXX11DEDUCTION_HPP +#define TESTCXX11DEDUCTION_HPP + +namespace TestCXX11 { + +struct TestReductionDeductionTagA {}; +struct TestReductionDeductionTagB {}; + +template < class ExecSpace > +struct TestReductionDeductionFunctor { + + // KOKKOS_INLINE_FUNCTION + // void operator()( long i , long & value ) const + // { value += i + 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( TestReductionDeductionTagA , long i , long & value ) const + { value += ( 2 * i + 1 ) + ( 2 * i + 2 ); } + + KOKKOS_INLINE_FUNCTION + void operator()( const TestReductionDeductionTagB & , const long i , long & value ) const + { value += ( 3 * i + 1 ) + ( 3 * i + 2 ) + ( 3 * i + 3 ) ; } + +}; + +template< class ExecSpace > +void test_reduction_deduction() +{ + typedef TestReductionDeductionFunctor< ExecSpace > Functor ; + + const long N = 50 ; + // const long answer = N % 2 ? ( N * ((N+1)/2 )) : ( (N/2) * (N+1) ); + const long answerA = N % 2 ? ( (2*N) * (((2*N)+1)/2 )) : ( ((2*N)/2) * ((2*N)+1) ); + const long answerB = N % 2 ? ( (3*N) * (((3*N)+1)/2 )) : ( ((3*N)/2) * ((3*N)+1) ); + long result = 0 ; + + // Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace>(0,N) , Functor() , result ); + // ASSERT_EQ( answer , result ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,TestReductionDeductionTagA>(0,N) , Functor() , result ); + ASSERT_EQ( answerA , result ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,TestReductionDeductionTagB>(0,N) , Functor() , result ); + ASSERT_EQ( answerB , result ); +} + +} + +#endif + diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dfa2250c04ae8cc785383b1f64a127ad40279f57 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#define KOKKOS_PRAGMA_UNROLL(a) + +namespace TestCompilerMacros { + +template<class DEVICE_TYPE> +struct AddFunctor { + typedef DEVICE_TYPE execution_space; + typedef typename Kokkos::View<int**,execution_space> type; + type a,b; + int length; + + AddFunctor(type a_, type b_):a(a_),b(b_),length(a.dimension_1()) {} + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { +#ifdef KOKKOS_HAVE_PRAGMA_UNROLL + #pragma unroll +#endif +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep +#endif +#ifdef KOKKOS_HAVE_PRAGMA_VECTOR + #pragma vector always +#endif +#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT + #pragma loop count(128) +#endif +#ifdef KOKKOS_HAVE_PRAGMA_SIMD + #pragma simd +#endif + for(int j=0;j<length;j++) + a(i,j) += b(i,j); + } +}; + +template<class DeviceType> +bool Test() { + typedef typename Kokkos::View<int**,DeviceType> type; + type a("A",1024,128); + type b("B",1024,128); + + AddFunctor<DeviceType> f(a,b); + Kokkos::parallel_for(1024,f); + DeviceType::fence(); + return true; +} + +} diff --git a/lib/kokkos/core/unit_test/TestCuda.cpp b/lib/kokkos/core/unit_test/TestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e6155662525f08fd718e02a40243e942dd77104d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCuda.cpp @@ -0,0 +1,290 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> +#include <impl/Kokkos_ViewTileLeft.hpp> +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestMemoryPool.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> +#include <TestCXX11Deduction.hpp> + +#include <TestTaskPolicy.hpp> +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +//---------------------------------------------------------------------------- + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +void cuda::SetUpTestCase() + { + Kokkos::Cuda::print_configuration( std::cout ); + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + +void cuda::TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Test { + +__global__ +void test_abort() +{ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + Kokkos::CudaSpace , + Kokkos::HostSpace >::verify(); +} + +__global__ +void test_cuda_spaces_int_value( int * ptr ) +{ + if ( *ptr == 42 ) { *ptr = 2 * 42 ; } +} + +TEST_F( cuda , md_range ) { + TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100); +} + +TEST_F( cuda , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); +} + +TEST_F( cuda , memory_space ) +{ + TestMemorySpace< Kokkos::Cuda >(); +} + +TEST_F( cuda, uvm ) +{ + if ( Kokkos::CudaUVMSpace::available() ) { + + int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int)); + + *uvm_ptr = 42 ; + + Kokkos::Cuda::fence(); + test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr); + Kokkos::Cuda::fence(); + + EXPECT_EQ( *uvm_ptr, int(2*42) ); + + Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr ); + } +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda , impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >(); +} + +TEST_F( cuda, policy_construction) { + TestRangePolicyConstruction< Kokkos::Cuda >(); + TestTeamPolicyConstruction< Kokkos::Cuda >(); +} + +TEST_F( cuda , impl_view_mapping ) +{ + test_view_mapping< Kokkos::Cuda >(); + test_view_mapping< Kokkos::CudaUVMSpace >(); + test_view_mapping_subview< Kokkos::Cuda >(); + test_view_mapping_subview< Kokkos::CudaUVMSpace >(); + test_view_mapping_operator< Kokkos::Cuda >(); + test_view_mapping_operator< Kokkos::CudaUVMSpace >(); + TestViewMappingAtomic< Kokkos::Cuda >::run(); +} + +TEST_F( cuda , view_of_class ) +{ + TestViewMappingClassValue< Kokkos::CudaSpace >::run(); + TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); +} + +template< class MemSpace > +struct TestViewCudaTexture { + + enum { N = 1000 }; + + using V = Kokkos::Experimental::View<double*,MemSpace> ; + using T = Kokkos::Experimental::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ; + + V m_base ; + T m_tex ; + + struct TagInit {}; + struct TagTest {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagTest & , const int i , long & error_count ) const + { if ( m_tex[i] != i + 1 ) ++error_count ; } + + TestViewCudaTexture() + : m_base("base",N) + , m_tex( m_base ) + {} + + static void run() + { + EXPECT_TRUE( ( std::is_same< typename V::reference_type + , double & + >::value ) ); + + EXPECT_TRUE( ( std::is_same< typename T::reference_type + , const double + >::value ) ); + + EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view + EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value + + TestViewCudaTexture self ; + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self ); + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count ); + EXPECT_EQ( error_count , 0 ); + } +}; + +TEST_F( cuda , impl_view_texture ) +{ + TestViewCudaTexture< Kokkos::CudaSpace >::run(); + TestViewCudaTexture< Kokkos::CudaUVMSpace >::run(); +} + +template< class MemSpace , class ExecSpace > +struct TestViewCudaAccessible { + + enum { N = 1000 }; + + using V = Kokkos::Experimental::View<double*,MemSpace> ; + + V m_base ; + + struct TagInit {}; + struct TagTest {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagTest & , const int i , long & error_count ) const + { if ( m_base[i] != i + 1 ) ++error_count ; } + + TestViewCudaAccessible() + : m_base("base",N) + {} + + static void run() + { + TestViewCudaAccessible self ; + Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self ); + MemSpace::execution_space::fence(); + // Next access is a different execution space, must complete prior kernel. + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count ); + EXPECT_EQ( error_count , 0 ); + } +}; + +TEST_F( cuda , impl_view_accessible ) +{ + TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run(); + + TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run(); + + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run(); +} + +} diff --git a/lib/kokkos/core/unit_test/TestCuda_a.cpp b/lib/kokkos/core/unit_test/TestCuda_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4680c333867ff0e68f572121a654f8f23d09fcfb --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCuda_a.cpp @@ -0,0 +1,182 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> +#include <impl/Kokkos_ViewTileLeft.hpp> +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestMemoryPool.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> +#include <TestCXX11Deduction.hpp> + +#include <TestTaskPolicy.hpp> +#include <TestPolicyConstruction.hpp> + +//---------------------------------------------------------------------------- + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +//---------------------------------------------------------------------------- + +namespace Test { + +TEST_F( cuda, view_impl ) +{ + // test_abort<<<32,32>>>(); // Aborts the kernel with CUDA version 4.1 or greater + + test_view_impl< Kokkos::Cuda >(); +} + +TEST_F( cuda, view_api ) +{ + typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ; + typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ; + + TestViewAPI< double , Kokkos::Cuda >(); + TestViewAPI< double , Kokkos::CudaUVMSpace >(); + +#if 0 + Kokkos::View<double, Kokkos::Cuda > x("x"); + Kokkos::View<double[1], Kokkos::Cuda > y("y"); + // *x = 10 ; + // x() = 10 ; + // y[0] = 10 ; + // y(0) = 10 ; +#endif +} + +TEST_F( cuda , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_2d_from_5d ) { + TestViewSubview::test_2d_subview_5d< Kokkos::CudaUVMSpace >(); +} + +} diff --git a/lib/kokkos/core/unit_test/TestCuda_b.cpp b/lib/kokkos/core/unit_test/TestCuda_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4ca949e57cb02d15444ec7f3e48b123003b6a68 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCuda_b.cpp @@ -0,0 +1,191 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> +#include <impl/Kokkos_ViewTileLeft.hpp> +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestMemoryPool.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> +#include <TestCXX11Deduction.hpp> + +#include <TestTaskPolicy.hpp> +#include <TestPolicyConstruction.hpp> + +//---------------------------------------------------------------------------- + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +//---------------------------------------------------------------------------- + +namespace Test { + +TEST_F( cuda, range_tag ) +{ + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + //TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + +TEST_F( cuda, team_tag ) +{ + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( cuda, reduce ) +{ + TestReduce< long , Kokkos::Cuda >( 10000000 ); + TestReduce< double , Kokkos::Cuda >( 1000000 ); + TestReduce< int , Kokkos::Cuda >( 0 ); +} + +TEST_F( cuda , reducers ) +{ + TestReducers<int, Kokkos::Cuda>::execute_integer(); + TestReducers<size_t, Kokkos::Cuda>::execute_integer(); + TestReducers<double, Kokkos::Cuda>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic(); +} + +TEST_F( cuda, reduce_team ) +{ + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( cuda, shared_team ) +{ + TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( cuda, lambda_shared_team ) +{ + TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( cuda, shmem_size) { + TestShmemSize< Kokkos::Cuda >(); +} + +TEST_F( cuda, multi_level_scratch) { + TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( cuda, reduce_dynamic ) +{ + TestReduceDynamic< long , Kokkos::Cuda >( 10000000 ); + TestReduceDynamic< double , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, reduce_dynamic_view ) +{ + TestReduceDynamicView< long , Kokkos::Cuda >( 10000000 ); + TestReduceDynamicView< double , Kokkos::Cuda >( 1000000 ); +} + +} diff --git a/lib/kokkos/core/unit_test/TestCuda_c.cpp b/lib/kokkos/core/unit_test/TestCuda_c.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70584cead1b5efb7b6b0b372aed95dd522c25169 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestCuda_c.cpp @@ -0,0 +1,375 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> +#include <impl/Kokkos_ViewTileLeft.hpp> +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestMemoryPool.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> +#include <TestCXX11Deduction.hpp> + +#include <TestTaskPolicy.hpp> +#include <TestPolicyConstruction.hpp> + +//---------------------------------------------------------------------------- + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +//---------------------------------------------------------------------------- + +namespace Test { + +TEST_F( cuda, atomic ) +{ + const int loop_count = 1e3 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,3) ) ); + +} + +TEST_F( cuda , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 4 ) ) ); + } + +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda, tile_layout) +{ + TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); + + TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 ); +} + +TEST_F( cuda , view_aggregate ) +{ + TestViewAggregate< Kokkos::Cuda >(); + TestViewAggregateReduction< Kokkos::Cuda >(); +} + +TEST_F( cuda , scan ) +{ + TestScan< Kokkos::Cuda >::test_range( 1 , 1000 ); + TestScan< Kokkos::Cuda >( 1000000 ); + TestScan< Kokkos::Cuda >( 10000000 ); + + TestScan< Kokkos::Cuda >( 0 ); + TestScan< Kokkos::Cuda >( 0 , 0 ); + + Kokkos::Cuda::fence(); +} + +TEST_F( cuda , team_scan ) +{ + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( cuda , memory_pool ) +{ +// typedef Kokkos::CudaUVMSpace device_type; + typedef Kokkos::Cuda device_type; + + bool val = TestMemoryPool::test_mempool< device_type >( 128, 128000000 ); + ASSERT_TRUE( val ); + + Kokkos::Cuda::fence(); + + TestMemoryPool::test_mempool2< device_type >( 64, 4, 100000, 200000 ); + + Kokkos::Cuda::fence(); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >(); + + Kokkos::Cuda::fence(); +} + +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Cuda >(); +} + +//---------------------------------------------------------------------------- + +namespace Test { + +TEST_F( cuda , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); +} + +TEST_F( cuda , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) ); +} + +TEST_F( cuda, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 ); +} + +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +TEST_F( cuda , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestFib< Kokkos::Cuda >::run(i, (i+1)*1000000 ); + } +} + +TEST_F( cuda , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestTaskDependence< Kokkos::Cuda >::run(i); + } +} + +TEST_F( cuda , task_team ) +{ + //TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(1000); + TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(104); + TestTaskPolicy::TestTaskTeamValue< Kokkos::Cuda >::run(1000); +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda , old_task_policy ) +{ + TestTaskPolicy::test_task_dep< Kokkos::Cuda >( 10 ); + + for ( long i = 0 ; i < 15 ; ++i ) { + // printf("TestTaskPolicy::test_fib< Kokkos::Cuda >(%d);\n",i); + TestTaskPolicy::test_fib< Kokkos::Cuda >(i,4096); + } + for ( long i = 0 ; i < 35 ; ++i ) { + // printf("TestTaskPolicy::test_fib2< Kokkos::Cuda >(%d);\n",i); + TestTaskPolicy::test_fib2< Kokkos::Cuda >(i,4096); + } +} + +TEST_F( cuda , old_task_team ) +{ + TestTaskPolicy::test_task_team< Kokkos::Cuda >(1000); +} + +TEST_F( cuda , old_task_latch ) +{ + TestTaskPolicy::test_latch< Kokkos::Cuda >(10); + TestTaskPolicy::test_latch< Kokkos::Cuda >(1000); +} + +#endif // #if defined( KOKKOS_ENABLE_TASKPOLICY ) + diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1b1e0e67365fa28778cb848cbd52d0a2399c97e6 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestCXX11.hpp> +#include <TestTeamVector.hpp> + +namespace Test { + +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::initialize(); + } + + static void TearDownTestCase() + { + Kokkos::finalize(); + } +}; + + +TEST_F( defaultdevicetype, view_impl) { + test_view_impl< Kokkos::DefaultExecutionSpace >(); +} + +TEST_F( defaultdevicetype, view_api) { + TestViewAPI< double , Kokkos::DefaultExecutionSpace >(); +} + +TEST_F( defaultdevicetype, long_reduce) { + TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, double_reduce) { + TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + + +TEST_F( defaultdevicetype , atomics ) +{ + const int loop_count = 1e4 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) ); +} + +/*TEST_F( defaultdevicetype , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::DefaultExecutionSpace > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::DefaultExecutionSpace > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::DefaultExecutionSpace > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +}*/ + +//---------------------------------------------------------------------------- + + +TEST_F( defaultdevicetype , view_aggregate ) +{ + TestViewAggregate< Kokkos::DefaultExecutionSpace >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( defaultdevicetype , scan ) +{ + TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 ); + TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); + TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); + Kokkos::DefaultExecutionSpace::fence(); +} + + +//---------------------------------------------------------------------------- + +TEST_F( defaultdevicetype , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); +} + + +//---------------------------------------------------------------------------- +TEST_F( defaultdevicetype , cxx11 ) +{ + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) ); +} + +TEST_F( defaultdevicetype , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) ); +} + +TEST_F( defaultdevicetype , malloc ) +{ + int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int)); + ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int))); + Kokkos::kokkos_free(data); + + int* data2 = (int*) Kokkos::kokkos_malloc(0); + ASSERT_TRUE(data2==NULL); + Kokkos::kokkos_free(data2); +} + +} // namespace test + +#endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a17ed97a9ff4130a2ca2ea087b400e9595c69dd9 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -0,0 +1,419 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#ifdef KOKKOS_HAVE_OPENMP +#include <omp.h> +#endif + +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- + +namespace Test { + +namespace Impl { + + char** init_kokkos_args(bool do_threads,bool do_numa,bool do_device,bool do_other, int& nargs, Kokkos::InitArguments& init_args) { + nargs = (do_threads?1:0) + + (do_numa?1:0) + + (do_device?1:0) + + (do_other?4:0); + char** args_kokkos = new char*[nargs]; + for(int i = 0; i < nargs; i++) + args_kokkos[i] = new char[20]; + + int threads_idx = do_other?1:0; + int numa_idx = (do_other?3:0) + (do_threads?1:0); + int device_idx = (do_other?3:0) + (do_threads?1:0) + (do_numa?1:0); + + + if(do_threads) { + int nthreads = 3; + +#ifdef KOKKOS_HAVE_OPENMP + if(omp_get_max_threads() < 3) + nthreads = omp_get_max_threads(); +#endif + + if(Kokkos::hwloc::available()) { + if(Kokkos::hwloc::get_available_threads_per_core()<3) + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); + } + +#ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + nthreads = 1; + } +#endif + init_args.num_threads = nthreads; + sprintf(args_kokkos[threads_idx],"--threads=%i",nthreads); + } + + if(do_numa) { + int numa = 1; + if(Kokkos::hwloc::available()) + numa = Kokkos::hwloc::get_available_numa_count(); +#ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + numa = 1; + } +#endif + + init_args.num_numa = numa; + sprintf(args_kokkos[numa_idx],"--numa=%i",numa); + } + + if(do_device) { + + init_args.device_id = 0; + sprintf(args_kokkos[device_idx],"--device=%i",0); + } + + if(do_other) { + sprintf(args_kokkos[0],"--dummyarg=1"); + sprintf(args_kokkos[threads_idx+(do_threads?1:0)],"--dummy2arg"); + sprintf(args_kokkos[threads_idx+(do_threads?1:0)+1],"dummy3arg"); + sprintf(args_kokkos[device_idx+(do_device?1:0)],"dummy4arg=1"); + } + + + return args_kokkos; + } + + Kokkos::InitArguments init_initstruct(bool do_threads, bool do_numa, bool do_device) { + Kokkos::InitArguments args; + + if(do_threads) { + int nthreads = 3; + +#ifdef KOKKOS_HAVE_OPENMP + if(omp_get_max_threads() < 3) + nthreads = omp_get_max_threads(); +#endif + + if(Kokkos::hwloc::available()) { + if(Kokkos::hwloc::get_available_threads_per_core()<3) + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); + } +#ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + nthreads = 1; + } +#endif + + args.num_threads = nthreads; + } + + if(do_numa) { + int numa = 1; + if(Kokkos::hwloc::available()) + numa = Kokkos::hwloc::get_available_numa_count(); +#ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + numa = 1; + } +#endif + args.num_numa = numa; + } + + if(do_device) { + args.device_id = 0; + } + + return args; + } + + void check_correct_initialization(const Kokkos::InitArguments& argstruct) { + ASSERT_EQ( Kokkos::DefaultExecutionSpace::is_initialized(), 1); + ASSERT_EQ( Kokkos::HostSpace::execution_space::is_initialized(), 1); + + //Figure out the number of threads the HostSpace ExecutionSpace should have initialized to + int expected_nthreads = argstruct.num_threads; + if(expected_nthreads<1) { + if(Kokkos::hwloc::available()) { + expected_nthreads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core(); + } else { + #ifdef KOKKOS_HAVE_OPENMP + if(Kokkos::Impl::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) { + expected_nthreads = omp_get_max_threads(); + } else + #endif + expected_nthreads = 1; + + } + #ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || + Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) + expected_nthreads = 1; + #endif + } + + int expected_numa = argstruct.num_numa; + if(expected_numa<1) { + if(Kokkos::hwloc::available()) { + expected_numa = Kokkos::hwloc::get_available_numa_count(); + } else { + expected_numa = 1; + } + #ifdef KOKKOS_HAVE_SERIAL + if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || + Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) + expected_numa = 1; + #endif + } + ASSERT_EQ(Kokkos::HostSpace::execution_space::thread_pool_size(),expected_nthreads); + +#ifdef KOKKOS_HAVE_CUDA + if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) { + int device; + cudaGetDevice( &device ); + int expected_device = argstruct.device_id; + if(argstruct.device_id<0) { + expected_device = 0; + } + ASSERT_EQ(expected_device,device); + } +#endif + } + + //ToDo: Add check whether correct number of threads are actually started + void test_no_arguments() { + Kokkos::initialize(); + check_correct_initialization(Kokkos::InitArguments()); + Kokkos::finalize(); + } + + void test_commandline_args(int nargs, char** args, const Kokkos::InitArguments& argstruct) { + Kokkos::initialize(nargs,args); + check_correct_initialization(argstruct); + Kokkos::finalize(); + } + + void test_initstruct_args(const Kokkos::InitArguments& args) { + Kokkos::initialize(args); + check_correct_initialization(args); + Kokkos::finalize(); + } +} + +class defaultdevicetypeinit : public ::testing::Test { +protected: + static void SetUpTestCase() + { + } + + static void TearDownTestCase() + { + } +}; + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_01 +TEST_F( defaultdevicetypeinit, no_args) { + Impl::test_no_arguments(); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_02 +TEST_F( defaultdevicetypeinit, commandline_args_empty) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(false,false,false,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_03 +TEST_F( defaultdevicetypeinit, commandline_args_other) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(false,false,false,true,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_04 +TEST_F( defaultdevicetypeinit, commandline_args_nthreads) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(true,false,false,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_05 +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(true,true,false,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_06 +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(true,true,true,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_07 +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_device) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(true,false,true,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_08 +TEST_F( defaultdevicetypeinit, commandline_args_numa_device) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(false,true,true,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_09 +TEST_F( defaultdevicetypeinit, commandline_args_device) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(false,false,true,false,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_10 +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device_other) { + Kokkos::InitArguments argstruct; + int nargs = 0; + char** args = Impl::init_kokkos_args(true,true,true,true,nargs, argstruct); + Impl::test_commandline_args(nargs,args,argstruct); + for(int i = 0; i < nargs; i++) + delete [] args[i]; + delete [] args; +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_11 +TEST_F( defaultdevicetypeinit, initstruct_default) { + Kokkos::InitArguments args; + Impl::test_initstruct_args(args); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_12 +TEST_F( defaultdevicetypeinit, initstruct_nthreads) { + Kokkos::InitArguments args = Impl::init_initstruct(true,false,false); + Impl::test_initstruct_args(args); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_13 +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa) { + Kokkos::InitArguments args = Impl::init_initstruct(true,true,false); + Impl::test_initstruct_args(args); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_14 +TEST_F( defaultdevicetypeinit, initstruct_device) { + Kokkos::InitArguments args = Impl::init_initstruct(false,false,true); + Impl::test_initstruct_args(args); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_15 +TEST_F( defaultdevicetypeinit, initstruct_nthreads_device) { + Kokkos::InitArguments args = Impl::init_initstruct(true,false,true); + Impl::test_initstruct_args(args); +} +#endif + +#ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_16 +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa_device) { + Kokkos::InitArguments args = Impl::init_initstruct(true,true,true); + Impl::test_initstruct_args(args); +} +#endif + + +} // namespace test + +#endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..40a773b3b8fd18fb0a4cce396b4cc19400b9ad41 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_01 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f12c4f62b25acbb44e1f7d58876884035c250d9f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_10 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7ffd7b94e5675b28d519e5dc785ccfb55549b31 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_11 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24e2b152014a8308e1ef3eccaa44ad76d884f9d2 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_12 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7968c13b661cad0b54697e86626d166fe0949602 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_13 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ab0563c6dc03d45fc696ea538cb75d6288f1e576 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_14 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70a8ca1727515910f5bae07703421e9e95e6ab42 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_15 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..727c7a95eb9f949f6ecb0e910dc8ff009d6b8225 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_16 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88fba34c50e93c2ddb8e730d50e08d853b44dba5 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_02 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b3562cc53d6b4cf2a4162b916d84f94e1ab482a6 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_03 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d4983319cb565f2ba4283b910bd16cabc48253a --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_04 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp new file mode 100644 index 0000000000000000000000000000000000000000..026fb01f8870af1a2d24f59da17a5d419721ba71 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_05 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp new file mode 100644 index 0000000000000000000000000000000000000000..937a13160e40f6ec4666a7f4cb7eb7dc62d8a8fc --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_06 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp new file mode 100644 index 0000000000000000000000000000000000000000..992c854c1a10224a09d897a917e309f654cd4763 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_07 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..07a8b1cb7c2ea3fc515f2f403ad6401353d7f7a1 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_08 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d8c05be2d7f486487d7f39357982361117b4b76 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp @@ -0,0 +1,2 @@ +#define KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_09 +#include<TestDefaultDeviceTypeInit.hpp> diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c15f81223329eaa749d84fbef28340638fd3c835 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp @@ -0,0 +1,76 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- + +#include <TestReduce.hpp> + + +namespace Test { + +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::initialize(); + } + + static void TearDownTestCase() + { + Kokkos::finalize(); + } +}; + + +TEST_F( defaultdevicetype, reduce_instantiation) { + TestReduceCombinatoricalInstantiation<>::execute(); +} + +} // namespace test + +#endif diff --git a/lib/kokkos/core/unit_test/TestHWLOC.cpp b/lib/kokkos/core/unit_test/TestHWLOC.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1637dec5de4ff762cfbd259ee47932b5e85eb4d0 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestHWLOC.cpp @@ -0,0 +1,69 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> +#include <Kokkos_hwloc.hpp> + +namespace Test { + +class hwloc : public ::testing::Test { +protected: + static void SetUpTestCase() + {} + + static void TearDownTestCase() + {} +}; + +TEST_F( hwloc, query) +{ + std::cout << " NUMA[" << Kokkos::hwloc::get_available_numa_count() << "]" + << " CORE[" << Kokkos::hwloc::get_available_cores_per_numa() << "]" + << " PU[" << Kokkos::hwloc::get_available_threads_per_core() << "]" + << std::endl ; +} + +} + diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9894d1ce697c1f109163f7711e62f12cfceef703 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -0,0 +1,555 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { +namespace { + +template <typename ExecSpace > +struct TestMDRange_2D { + + using DataType = int ; + using ViewType = typename Kokkos::View< DataType** , ExecSpace > ; + using HostViewType = typename ViewType::HostMirror ; + + ViewType input_view ; + + TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view("input_view", N0, N1) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i , const int j ) const + { + input_view(i,j) = 1; + } + + + static void test_for2( const int64_t N0, const int64_t N1 ) + { + + using namespace Kokkos::Experimental; + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> >; + range_type range( {0,0}, {N0,N1} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left , Iterate::Left >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1}, {3,3} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left , Iterate::Right >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1}, {7,7} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1}, {16,16} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right >, Kokkos::IndexType<int> >; + + range_type range( {0,0}, {N0,N1}, {5,16} ); + TestMDRange_2D functor(N0,N1); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + if ( h_view(i,j) != 1 ) { + ++counter; + } + }} + if ( counter != 0 ) + printf(" Errors in test_for2; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + } //end test_for2 +}; //MDRange_2D + +template <typename ExecSpace > +struct TestMDRange_3D { + + using DataType = int ; + using ViewType = typename Kokkos::View< DataType*** , ExecSpace > ; + using HostViewType = typename ViewType::HostMirror ; + + ViewType input_view ; + + TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view("input_view", N0, N1, N2) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i , const int j , const int k ) const + { + input_view(i,j,k) = 1; + } + + static void test_for3( const int64_t N0, const int64_t N1, const int64_t N2 ) + { + using namespace Kokkos::Experimental; + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Flat, Iterate::Default>, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Flat, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Flat >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Right >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2}, {3,5,7} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2}, {8,8,8} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + { + using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right >, Kokkos::IndexType<int> >; + + range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); + TestMDRange_3D functor(N0,N1,N2); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view , functor.input_view ); + + int counter = 0; + for ( int i=0; i<N0; ++i ) { + for ( int j=0; j<N1; ++j ) { + for ( int k=0; k<N2; ++k ) { + if ( h_view(i,j,k) != 1 ) { + ++counter; + } + }}} + if ( counter != 0 ) + printf(" Errors in test_for3; mismatches = %d\n\n",counter); + ASSERT_EQ( counter , 0 ); + } + + } //end test_for3 +}; + +} /* namespace */ +} /* namespace Test */ + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cf650b0bc8baa1949643a57ffff808c83f406286 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -0,0 +1,820 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_UNITTEST_MEMPOOL_HPP +#define KOKKOS_UNITTEST_MEMPOOL_HPP + +#include <stdio.h> +#include <iostream> +#include <cmath> +#include <algorithm> + +#include <impl/Kokkos_Timer.hpp> + +//#define TESTMEMORYPOOL_PRINT +//#define TESTMEMORYPOOL_PRINT_STATUS + +#ifdef KOKKOS_HAVE_CUDA +#define STRIDE 32 +#else +#define STRIDE 1 +#endif + +namespace TestMemoryPool { + +struct pointer_obj { + uint64_t * ptr; +}; + +struct pointer_obj2 { + void * ptr; + size_t size; +}; + +template < typename PointerView, typename Allocator > +struct allocate_memory { + typedef typename PointerView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + + PointerView m_pointers; + size_t m_chunk_size; + Allocator m_mempool; + + allocate_memory( PointerView & ptrs, size_t num_ptrs, + size_t cs, Allocator & m ) + : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_for( num_ptrs * STRIDE, *this ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + if ( i % STRIDE == 0 ) { + m_pointers[i / STRIDE].ptr = + static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) ); + } + } +}; + +template < typename PointerView > +struct count_invalid_memory { + typedef typename PointerView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + typedef uint64_t value_type; + + PointerView m_pointers; + uint64_t & m_result; + + count_invalid_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res ) + : m_pointers( ptrs ), m_result( res ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result ); + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, volatile value_type const & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i, value_type & r ) const + { + if ( i % STRIDE == 0 ) { + r += ( m_pointers[i / STRIDE].ptr == 0 ); + } + } +}; + +template < typename PointerView > +struct fill_memory { + typedef typename PointerView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + + PointerView m_pointers; + + fill_memory( PointerView & ptrs, size_t num_ptrs ) : m_pointers( ptrs ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_for( num_ptrs * STRIDE, *this ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + if ( i % STRIDE == 0 ) { + *m_pointers[i / STRIDE].ptr = i / STRIDE ; + } + } +}; + +template < typename PointerView > +struct sum_memory { + typedef typename PointerView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + typedef uint64_t value_type; + + PointerView m_pointers; + uint64_t & m_result; + + sum_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res ) + : m_pointers( ptrs ), m_result( res ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result ); + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, volatile value_type const & src ) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i, value_type & r ) const + { + if ( i % STRIDE == 0 ) { + r += *m_pointers[i / STRIDE].ptr; + } + } +}; + +template < typename PointerView, typename Allocator > +struct deallocate_memory { + typedef typename PointerView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + + PointerView m_pointers; + size_t m_chunk_size; + Allocator m_mempool; + + deallocate_memory( PointerView & ptrs, size_t num_ptrs, + size_t cs, Allocator & m ) + : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_for( num_ptrs * STRIDE, *this ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + if ( i % STRIDE == 0 ) { + m_mempool.deallocate( m_pointers[i / STRIDE].ptr, m_chunk_size ); + } + } +}; + +template < typename WorkView, typename PointerView, typename ScalarView, + typename Allocator > +struct allocate_deallocate_memory { + typedef typename WorkView::execution_space execution_space; + typedef typename execution_space::size_type size_type; + + WorkView m_work; + PointerView m_pointers; + ScalarView m_ptrs_front; + ScalarView m_ptrs_back; + Allocator m_mempool; + + allocate_deallocate_memory( WorkView & w, size_t work_size, PointerView & p, + ScalarView pf, ScalarView pb, Allocator & m ) + : m_work( w ), m_pointers( p ), m_ptrs_front( pf ), m_ptrs_back( pb ), + m_mempool( m ) + { + // Initialize the view with the out degree of each vertex. + Kokkos::parallel_for( work_size * STRIDE, *this ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type i ) const + { + if ( i % STRIDE == 0 ) { + unsigned my_work = m_work[i / STRIDE]; + + if ( ( my_work & 1 ) == 0 ) { + // Allocation. + size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_back(), 1 ); + size_t alloc_size = my_work >> 1; + m_pointers[pos].ptr = m_mempool.allocate( alloc_size ); + m_pointers[pos].size = alloc_size; + } + else { + // Deallocation. + size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_front(), 1 ); + m_mempool.deallocate( m_pointers[pos].ptr, m_pointers[pos].size ); + } + } + } +}; + +#define PRECISION 6 +#define SHIFTW 24 +#define SHIFTW2 12 + +template < typename F > +void print_results( const std::string & text, F elapsed_time ) +{ + std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) + << std::fixed << std::setprecision( PRECISION ) << elapsed_time + << std::endl; +} + +template < typename F, typename T > +void print_results( const std::string & text, unsigned long long width, + F elapsed_time, T result ) +{ + std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) + << std::fixed << std::setprecision( PRECISION ) << elapsed_time + << " " << std::setw( width ) << result << std::endl; +} + +template < typename F > +void print_results( const std::string & text, unsigned long long width, + F elapsed_time, const std::string & result ) +{ + std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) + << std::fixed << std::setprecision( PRECISION ) << elapsed_time + << " " << std::setw( width ) << result << std::endl; +} + +// This test slams allocation and deallocation in a worse than real-world usage +// scenario to see how bad the thread-safety really is by having a loop where +// all threads allocate and a subsequent loop where all threads deallocate. +// All of the allocation requests are for equal-sized chunks that are the base +// chunk size of the memory pool. It also tests initialization of the memory +// pool and breaking large chunks into smaller chunks to fulfill allocation +// requests. It verifies that MemoryPool(), allocate(), and deallocate() work +// correctly. +template < class Device > +bool test_mempool( size_t chunk_size, size_t total_size ) +{ + typedef typename Device::execution_space execution_space; + typedef typename Device::memory_space memory_space; + typedef Device device_type; + typedef Kokkos::View< pointer_obj *, device_type > pointer_view; + typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; + + uint64_t result; + size_t num_chunks = total_size / chunk_size; + bool return_val = true; + + pointer_view pointers( "pointers", num_chunks ); + +#ifdef TESTMEMORYPOOL_PRINT + std::cout << "*** test_mempool() ***" << std::endl + << std::setw( SHIFTW ) << "chunk_size: " << std::setw( 12 ) + << chunk_size << std::endl + << std::setw( SHIFTW ) << "total_size: " << std::setw( 12 ) + << total_size << std::endl + << std::setw( SHIFTW ) << "num_chunks: " << std::setw( 12 ) + << num_chunks << std::endl; + + double elapsed_time = 0; + Kokkos::Timer timer; +#endif + + pool_memory_space mempool( memory_space(), total_size * 1.2, 20 ); + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "initialize mempool: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + allocate_memory< pointer_view, pool_memory_space > + am( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "allocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + count_invalid_memory< pointer_view > sm( pointers, num_chunks, result ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "invalid chunks: ", 16, elapsed_time, result ); + timer.reset(); +#endif + + { + fill_memory< pointer_view > fm( pointers, num_chunks ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "fill chunks: ", elapsed_time ); + timer.reset(); +#endif + + { + sum_memory< pointer_view > sm( pointers, num_chunks, result ); + } + + execution_space::fence(); + +#ifdef TESTMEMORYPOOL_PRINT + elapsed_time = timer.seconds(); + print_results( "sum chunks: ", 16, elapsed_time, result ); +#endif + + if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) { + std::cerr << "Invalid sum value in memory." << std::endl; + return_val = false; + } + +#ifdef TESTMEMORYPOOL_PRINT + timer.reset(); +#endif + + { + deallocate_memory< pointer_view, pool_memory_space > + dm( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "deallocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + allocate_memory< pointer_view, pool_memory_space > + am( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "allocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + count_invalid_memory< pointer_view > sm( pointers, num_chunks, result ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "invalid chunks: ", 16, elapsed_time, result ); + timer.reset(); +#endif + + { + fill_memory< pointer_view > fm( pointers, num_chunks ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "fill chunks: ", elapsed_time ); + timer.reset(); +#endif + + { + sum_memory< pointer_view > sm( pointers, num_chunks, result ); + } + + execution_space::fence(); + +#ifdef TESTMEMORYPOOL_PRINT + elapsed_time = timer.seconds(); + print_results( "sum chunks: ", 16, elapsed_time, result ); +#endif + + if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) { + std::cerr << "Invalid sum value in memory." << std::endl; + return_val = false; + } + +#ifdef TESTMEMORYPOOL_PRINT + timer.reset(); +#endif + + { + deallocate_memory< pointer_view, pool_memory_space > + dm( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "deallocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif +#endif + + return return_val; +} + +template < typename T > +T smallest_power2_ge( T val ) +{ + // Find the most significant nonzero bit. + int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val ); + + // If val is an integral power of 2, ceil( log2(val) ) is equal to the + // most significant nonzero bit. Otherwise, you need to add 1. + int lg2_size = first_nonzero_bit + + !Kokkos::Impl::is_integral_power_of_two( val ); + + return T(1) << T(lg2_size); +} + +// This test makes allocation requests for multiple sizes and interleaves +// allocation and deallocation. +// +// There are 3 phases. The first phase does only allocations to build up a +// working state for the allocator. The second phase interleaves allocations +// and deletions. The third phase does only deallocations to undo all the +// allocations from the first phase. By building first to a working state, +// allocations and deallocations can happen in any order for the second phase. +// Each phase performs on multiple chunk sizes. +template < class Device > +void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, + size_t phase1_size, size_t phase2_size ) +{ +#ifdef TESTMEMORYPOOL_PRINT + typedef typename Device::execution_space execution_space; +#endif + typedef typename Device::memory_space memory_space; + typedef Device device_type; + typedef Kokkos::View< unsigned *, device_type > work_view; + typedef Kokkos::View< size_t, device_type > scalar_view; + typedef Kokkos::View< pointer_obj2 *, device_type > pointer_view; + typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; + + enum { + MIN_CHUNK_SIZE = 64, + MIN_BASE_CHUNK_SIZE = MIN_CHUNK_SIZE / 2 + 1 + }; + + // Make sure the base chunk size is at least MIN_BASE_CHUNK_SIZE bytes, so + // all the different chunk sizes translate to different block sizes for the + // allocator. + if ( base_chunk_size < MIN_BASE_CHUNK_SIZE ) { + base_chunk_size = MIN_BASE_CHUNK_SIZE; + } + + // Get the smallest power of 2 >= the base chunk size. The size must be + // >= MIN_CHUNK_SIZE, though. + unsigned ceil_base_chunk_size = smallest_power2_ge( base_chunk_size ); + if ( ceil_base_chunk_size < MIN_CHUNK_SIZE ) { + ceil_base_chunk_size = MIN_CHUNK_SIZE; + } + + // Make sure the phase 1 size is multiples of num_chunk_sizes. + phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) * + num_chunk_sizes; + + // Make sure the phase 2 size is multiples of (2 * num_chunk_sizes). + phase2_size = + ( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) * + 2 * num_chunk_sizes; + + // The phase2 size must be <= twice the phase1 size so that deallocations + // can't happen before allocations. + if ( phase2_size > 2 * phase1_size ) phase2_size = 2 * phase1_size; + + size_t phase3_size = phase1_size; + size_t half_phase2_size = phase2_size / 2; + + // Each entry in the work views has the following format. The least + // significant bit indicates allocation (0) vs. deallocation (1). For + // allocation, the other bits indicate the desired allocation size. + + // Initialize the phase 1 work view with an equal number of allocations for + // each chunk size. + work_view phase1_work( "Phase 1 Work", phase1_size ); + typename work_view::HostMirror host_phase1_work = + create_mirror_view(phase1_work); + + size_t inner_size = phase1_size / num_chunk_sizes; + unsigned chunk_size = base_chunk_size; + + for ( size_t i = 0; i < num_chunk_sizes; ++i ) { + for ( size_t j = 0; j < inner_size; ++j ) { + host_phase1_work[i * inner_size + j] = chunk_size << 1; + } + + chunk_size *= 2; + } + + std::random_shuffle( host_phase1_work.ptr_on_device(), + host_phase1_work.ptr_on_device() + phase1_size ); + + deep_copy( phase1_work, host_phase1_work ); + + // Initialize the phase 2 work view with half allocations and half + // deallocations with an equal number of allocations for each chunk size. + work_view phase2_work( "Phase 2 Work", phase2_size ); + typename work_view::HostMirror host_phase2_work = + create_mirror_view(phase2_work); + + inner_size = half_phase2_size / num_chunk_sizes; + chunk_size = base_chunk_size; + + for ( size_t i = 0; i < num_chunk_sizes; ++i ) { + for ( size_t j = 0; j < inner_size; ++j ) { + host_phase2_work[i * inner_size + j] = chunk_size << 1; + } + + chunk_size *= 2; + } + + for ( size_t i = half_phase2_size; i < phase2_size; ++i ) { + host_phase2_work[i] = 1; + } + + std::random_shuffle( host_phase2_work.ptr_on_device(), + host_phase2_work.ptr_on_device() + phase2_size ); + + deep_copy( phase2_work, host_phase2_work ); + + // Initialize the phase 3 work view with all deallocations. + work_view phase3_work( "Phase 3 Work", phase3_size ); + typename work_view::HostMirror host_phase3_work = + create_mirror_view(phase3_work); + + inner_size = phase3_size / num_chunk_sizes; + + for ( size_t i = 0; i < phase3_size; ++i ) host_phase3_work[i] = 1; + + deep_copy( phase3_work, host_phase3_work ); + + // Calculate the amount of memory needed for the allocator. We need to know + // the number of superblocks required for each chunk size and use that to + // calculate the amount of memory for each chunk size. + size_t lg_sb_size = 18; + size_t sb_size = 1 << lg_sb_size; + size_t total_size = 0; + size_t allocs_per_size = phase1_size / num_chunk_sizes + + half_phase2_size / num_chunk_sizes; + + chunk_size = ceil_base_chunk_size; + for ( size_t i = 0; i < num_chunk_sizes; ++i ) { + size_t my_size = allocs_per_size * chunk_size; + total_size += ( my_size + sb_size - 1 ) / sb_size * sb_size; + chunk_size *= 2; + } + + // Declare the queue to hold the records for allocated memory. An allocation + // adds a record to the back of the queue, and a deallocation removes a + // record from the front of the queue. + size_t num_allocations = phase1_size + half_phase2_size; + scalar_view ptrs_front( "Pointers front" ); + scalar_view ptrs_back( "Pointers back" ); + + pointer_view pointers( "pointers", num_allocations ); + +#ifdef TESTMEMORYPOOL_PRINT + printf( "\n*** test_mempool2() ***\n" ); + printf( " num_chunk_sizes: %12zu\n", num_chunk_sizes ); + printf( " base_chunk_size: %12u\n", base_chunk_size ); + printf( " ceil_base_chunk_size: %12u\n", ceil_base_chunk_size ); + printf( " phase1_size: %12zu\n", phase1_size ); + printf( " phase2_size: %12zu\n", phase2_size ); + printf( " phase3_size: %12zu\n", phase3_size ); + printf( " allocs_per_size: %12zu\n", allocs_per_size ); + printf( " num_allocations: %12zu\n", num_allocations ); + printf( " total_size: %12zu\n", total_size ); + fflush( stdout ); + + double elapsed_time = 0; + Kokkos::Timer timer; +#endif + + pool_memory_space mempool( memory_space(), total_size * 1.2, lg_sb_size ); + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "initialize mempool: ", elapsed_time ); + +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + + timer.reset(); +#endif + + { + allocate_deallocate_memory< work_view, pointer_view, scalar_view, + pool_memory_space > + adm( phase1_work, phase1_size, pointers, ptrs_front, ptrs_back, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "phase1: ", elapsed_time ); + +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + + timer.reset(); +#endif + + { + allocate_deallocate_memory< work_view, pointer_view, scalar_view, + pool_memory_space > + adm( phase2_work, phase2_size, pointers, ptrs_front, ptrs_back, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "phase2: ", elapsed_time ); + +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + + timer.reset(); +#endif + + { + allocate_deallocate_memory< work_view, pointer_view, scalar_view, + pool_memory_space > + adm( phase3_work, phase3_size, pointers, ptrs_front, ptrs_back, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "phase3: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif +#endif +} + +// Tests for correct behavior when the allocator is out of memory. +template < class Device > +void test_memory_exhaustion() +{ +#ifdef TESTMEMORYPOOL_PRINT + typedef typename Device::execution_space execution_space; +#endif + typedef typename Device::memory_space memory_space; + typedef Device device_type; + typedef Kokkos::View< pointer_obj *, device_type > pointer_view; + typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; + + // The allocator will have a single superblock, and allocations will all be + // of the same chunk size. The allocation loop will attempt to allocate + // twice the number of chunks as are available in the allocator. The + // deallocation loop will only free the successfully allocated chunks. + + size_t chunk_size = 128; + size_t num_chunks = 128; + size_t half_num_chunks = num_chunks / 2; + size_t superblock_size = chunk_size * half_num_chunks; + size_t lg_superblock_size = + Kokkos::Impl::integral_power_of_two( superblock_size ); + + pointer_view pointers( "pointers", num_chunks ); + +#ifdef TESTMEMORYPOOL_PRINT + std::cout << "\n*** test_memory_exhaustion() ***" << std::endl; + + double elapsed_time = 0; + Kokkos::Timer timer; +#endif + + pool_memory_space mempool( memory_space(), superblock_size, + lg_superblock_size ); + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "initialize mempool: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + allocate_memory< pointer_view, pool_memory_space > + am( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "allocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif + timer.reset(); +#endif + + { + // In parallel, the allocations that succeeded were not put contiguously + // into the pointers View. The whole View can still be looped over and + // have deallocate called because deallocate will just do nothing for NULL + // pointers. + deallocate_memory< pointer_view, pool_memory_space > + dm( pointers, num_chunks, chunk_size, mempool ); + } + +#ifdef TESTMEMORYPOOL_PRINT + execution_space::fence(); + elapsed_time = timer.seconds(); + print_results( "deallocate chunks: ", elapsed_time ); +#ifdef TESTMEMORYPOOL_PRINT_STATUS + mempool.print_status(); +#endif +#endif +} + +} + +#ifdef TESTMEMORYPOOL_PRINT +#undef TESTMEMORYPOOL_PRINT +#endif + +#ifdef TESTMEMORYPOOL_PRINT_STATUS +#undef TESTMEMORYPOOL_PRINT_STATUS +#endif + +#ifdef STRIDE +#undef STRIDE +#endif + +#endif diff --git a/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp b/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp new file mode 100644 index 0000000000000000000000000000000000000000..575f2f2c254ecae81132c8e5f714e4fe6e71c14f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp @@ -0,0 +1,100 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <iostream> +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace { + +template<class Arg1> +class TestMemorySpace { +public: + + typedef typename Arg1::memory_space MemorySpace; + TestMemorySpace() { run_test(); } + + void run_test() + { + +#if ! KOKKOS_USING_EXP_VIEW + + Kokkos::View<int* ,Arg1> invalid; + ASSERT_EQ(0u, invalid.tracker().ref_count() ); + + { + Kokkos::View<int* ,Arg1> a("A",10); + + ASSERT_EQ(1u, a.tracker().ref_count() ); + + { + Kokkos::View<int* ,Arg1> b = a; + ASSERT_EQ(2u, b.tracker().ref_count() ); + + Kokkos::View<int* ,Arg1> D("D",10); + ASSERT_EQ(1u, D.tracker().ref_count() ); + + { + Kokkos::View<int* ,Arg1> E("E",10); + ASSERT_EQ(1u, E.tracker().ref_count() ); + } + + ASSERT_EQ(2u, b.tracker().ref_count() ); + } + ASSERT_EQ(1u, a.tracker().ref_count() ); + } + +#endif + + } +}; + +} + +/*--------------------------------------------------------------------------*/ + + + diff --git a/lib/kokkos/core/unit_test/TestOpenMP.cpp b/lib/kokkos/core/unit_test/TestOpenMP.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e8fc4517917bfcaaeecba6fbc2ac59f6090350d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestOpenMP.cpp @@ -0,0 +1,262 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + const unsigned threads_count = std::max( 1u , numa_count ) * + std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + + Kokkos::OpenMP::initialize( threads_count ); + Kokkos::OpenMP::print_configuration( std::cout , true ); + srand(10231); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + + omp_set_num_threads(1); + + ASSERT_EQ( 1 , omp_get_max_threads() ); + } +}; + + +TEST_F( openmp , md_range ) { + TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100); + + TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100); +} + +TEST_F( openmp , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >(); +} + +TEST_F( openmp, policy_construction) { + TestRangePolicyConstruction< Kokkos::OpenMP >(); + TestTeamPolicyConstruction< Kokkos::OpenMP >(); +} + +TEST_F( openmp , impl_view_mapping ) { + test_view_mapping< Kokkos::OpenMP >(); + test_view_mapping_subview< Kokkos::OpenMP >(); + test_view_mapping_operator< Kokkos::OpenMP >(); + TestViewMappingAtomic< Kokkos::OpenMP >::run(); +} + +TEST_F( openmp, view_impl) { + test_view_impl< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_api) { + TestViewAPI< double , Kokkos::OpenMP >(); +} + +TEST_F( openmp , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::OpenMP >(); +} + +TEST_F( openmp , atomics ) +{ + const int loop_count = 1e4 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,3) ) ); +} + +TEST_F( openmp , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + } + +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestOpenMP_a.cpp b/lib/kokkos/core/unit_test/TestOpenMP_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..64eac66804b1ef6a053930d6db47abb566ccda66 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestOpenMP_a.cpp @@ -0,0 +1,150 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +TEST_F( openmp, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); +} + +TEST_F( openmp, view_subview_2d_from_5d ) { + TestViewSubview::test_2d_subview_5d< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestOpenMP_b.cpp b/lib/kokkos/core/unit_test/TestOpenMP_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6cc2476014c8d8e07ef6bc6a60b38c3660d3d7c4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestOpenMP_b.cpp @@ -0,0 +1,185 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +TEST_F( openmp , range_tag ) +{ + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + +TEST_F( openmp , team_tag ) +{ + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( openmp, long_reduce) { + TestReduce< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, double_reduce) { + TestReduce< double , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp , reducers ) +{ + TestReducers<int, Kokkos::OpenMP>::execute_integer(); + TestReducers<size_t, Kokkos::OpenMP>::execute_integer(); + TestReducers<double, Kokkos::OpenMP>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic(); +} + +TEST_F( openmp, team_long_reduce) { + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( openmp, team_double_reduce) { + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( openmp, team_shared_request) { + TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( openmp, team_scratch_request) { + TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( openmp, team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( openmp, shmem_size) { + TestShmemSize< Kokkos::OpenMP >(); +} + +TEST_F( openmp, multi_level_scratch) { + TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestOpenMP_c.cpp b/lib/kokkos/core/unit_test/TestOpenMP_c.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f0cdabe913b8a4125fc5a1541823328d749759bf --- /dev/null +++ b/lib/kokkos/core/unit_test/TestOpenMP_c.cpp @@ -0,0 +1,262 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestMemoryPool.hpp> +#include <TestTaskPolicy.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +TEST_F( openmp , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::OpenMP > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::OpenMP > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::OpenMP > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + + +TEST_F( openmp , view_aggregate ) +{ + TestViewAggregate< Kokkos::OpenMP >(); + TestViewAggregateReduction< Kokkos::OpenMP >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , scan ) +{ + TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 ); + TestScan< Kokkos::OpenMP >( 1000000 ); + TestScan< Kokkos::OpenMP >( 10000000 ); + Kokkos::OpenMP::fence(); +} + + +TEST_F( openmp , team_scan ) +{ + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , memory_space ) +{ + TestMemorySpace< Kokkos::OpenMP >(); +} + +TEST_F( openmp , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::OpenMP >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) +TEST_F( openmp , cxx11 ) +{ + if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) ); + } +} +#endif + +TEST_F( openmp , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); +} + +TEST_F( openmp , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) ); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +TEST_F( openmp , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestFib< Kokkos::OpenMP >::run(i, (i+1)*1000000 ); + } +} + +TEST_F( openmp , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestTaskDependence< Kokkos::OpenMP >::run(i); + } +} + +TEST_F( openmp , task_team ) +{ + TestTaskPolicy::TestTaskTeam< Kokkos::OpenMP >::run(1000); + //TestTaskPolicy::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //TODO put back after testing +} + + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + +} // namespace test + + + + + + diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp new file mode 100644 index 0000000000000000000000000000000000000000..049138eb07cd402140f1d509a3590eb8e3eb6104 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -0,0 +1,493 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> + +struct SomeTag{}; + +template< class ExecutionSpace > +class TestRangePolicyConstruction { +public: + TestRangePolicyConstruction() { + test_compile_time_parameters(); + } +private: + void test_compile_time_parameters() { + { + typedef Kokkos::RangePolicy<> policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<ExecutionSpace> policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::IndexType<long>, ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,ExecutionSpace,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::RangePolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::RangePolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + } +}; + +template< class ExecutionSpace > +class TestTeamPolicyConstruction { +public: + TestTeamPolicyConstruction() { + test_compile_time_parameters(); + test_run_time_parameters(); + } +private: + void test_compile_time_parameters() { + { + typedef Kokkos::TeamPolicy<> policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::IndexType<long>, ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,ExecutionSpace,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::TeamPolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + { + typedef Kokkos::TeamPolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); + ASSERT_TRUE((std::is_same<index_type ,long >::value)); + ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); + ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + } + } + + + template<class policy_t> + void test_run_time_parameters_type() { + int league_size = 131; + int team_size = 4<policy_t::execution_space::concurrency()?4:policy_t::execution_space::concurrency(); + int chunk_size = 4; + int per_team_scratch = 1024; + int per_thread_scratch = 16; + int scratch_size = per_team_scratch + per_thread_scratch*team_size; + policy_t p1(league_size,team_size); + ASSERT_EQ (p1.league_size() , league_size); + ASSERT_EQ (p1.team_size() , team_size); + ASSERT_TRUE(p1.chunk_size() > 0); + ASSERT_EQ (p1.scratch_size(0), 0); + + policy_t p2 = p1.set_chunk_size(chunk_size); + ASSERT_EQ (p1.league_size() , league_size); + ASSERT_EQ (p1.team_size() , team_size); + ASSERT_TRUE(p1.chunk_size() > 0); + ASSERT_EQ (p1.scratch_size(0), 0); + + ASSERT_EQ (p2.league_size() , league_size); + ASSERT_EQ (p2.team_size() , team_size); + ASSERT_EQ (p2.chunk_size() , chunk_size); + ASSERT_EQ (p2.scratch_size(0), 0); + + policy_t p3 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch)); + ASSERT_EQ (p2.league_size() , league_size); + ASSERT_EQ (p2.team_size() , team_size); + ASSERT_EQ (p2.chunk_size() , chunk_size); + ASSERT_EQ (p2.scratch_size(0), 0); + ASSERT_EQ (p3.league_size() , league_size); + ASSERT_EQ (p3.team_size() , team_size); + ASSERT_EQ (p3.chunk_size() , chunk_size); + ASSERT_EQ (p3.scratch_size(0), per_team_scratch); + + policy_t p4 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch)); + ASSERT_EQ (p2.league_size() , league_size); + ASSERT_EQ (p2.team_size() , team_size); + ASSERT_EQ (p2.chunk_size() , chunk_size); + ASSERT_EQ (p2.scratch_size(0), 0); + ASSERT_EQ (p4.league_size() , league_size); + ASSERT_EQ (p4.team_size() , team_size); + ASSERT_EQ (p4.chunk_size() , chunk_size); + ASSERT_EQ (p4.scratch_size(0), per_thread_scratch*team_size); + + policy_t p5 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch),Kokkos::PerTeam(per_team_scratch)); + ASSERT_EQ (p2.league_size() , league_size); + ASSERT_EQ (p2.team_size() , team_size); + ASSERT_EQ (p2.chunk_size() , chunk_size); + ASSERT_EQ (p2.scratch_size(0), 0); + ASSERT_EQ (p5.league_size() , league_size); + ASSERT_EQ (p5.team_size() , team_size); + ASSERT_EQ (p5.chunk_size() , chunk_size); + ASSERT_EQ (p5.scratch_size(0), scratch_size); + + policy_t p6 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); + ASSERT_EQ (p2.league_size() , league_size); + ASSERT_EQ (p2.team_size() , team_size); + ASSERT_EQ (p2.chunk_size() , chunk_size); + ASSERT_EQ (p2.scratch_size(0), 0); + ASSERT_EQ (p6.league_size() , league_size); + ASSERT_EQ (p6.team_size() , team_size); + ASSERT_EQ (p6.chunk_size() , chunk_size); + ASSERT_EQ (p6.scratch_size(0), scratch_size); + + policy_t p7 = p3.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); + ASSERT_EQ (p3.league_size() , league_size); + ASSERT_EQ (p3.team_size() , team_size); + ASSERT_EQ (p3.chunk_size() , chunk_size); + ASSERT_EQ (p3.scratch_size(0), per_team_scratch); + ASSERT_EQ (p7.league_size() , league_size); + ASSERT_EQ (p7.team_size() , team_size); + ASSERT_EQ (p7.chunk_size() , chunk_size); + ASSERT_EQ (p7.scratch_size(0), scratch_size); +} + void test_run_time_parameters() { + test_run_time_parameters_type<Kokkos::TeamPolicy<ExecutionSpace> >(); + test_run_time_parameters_type<Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > >(); + test_run_time_parameters_type<Kokkos::TeamPolicy<Kokkos::IndexType<long>, ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > >(); + test_run_time_parameters_type<Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace,SomeTag > >(); + } +}; diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp new file mode 100644 index 0000000000000000000000000000000000000000..431b844c9f4e60030f546fba320088f5eecf89c5 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestQthread.cpp @@ -0,0 +1,290 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <Kokkos_Qthread.hpp> + +#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp> + +//---------------------------------------------------------------------------- + +#include <TestViewImpl.hpp> +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> + +#include <TestTeam.hpp> +#include <TestRange.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskPolicy.hpp> +// #include <TestTeamVector.hpp> + +namespace Test { + +class qthread : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + int threads_count = std::max( 1u , numa_count ) + * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + Kokkos::Qthread::initialize( threads_count ); + Kokkos::Qthread::print_configuration( std::cout , true ); + } + + static void TearDownTestCase() + { + Kokkos::Qthread::finalize(); + } +}; + +TEST_F( qthread , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthread >() ) ); +} + +TEST_F( qthread, view_impl) { + test_view_impl< Kokkos::Qthread >(); +} + +TEST_F( qthread, view_api) { + TestViewAPI< double , Kokkos::Qthread >(); +} + +TEST_F( qthread , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Qthread >(); +} + +TEST_F( qthread , range_tag ) +{ + TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); +} + +TEST_F( qthread , team_tag ) +{ + TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); +} + +TEST_F( qthread, long_reduce) { + TestReduce< long , Kokkos::Qthread >( 1000000 ); +} + +TEST_F( qthread, double_reduce) { + TestReduce< double , Kokkos::Qthread >( 1000000 ); +} + +TEST_F( qthread, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Qthread >( 1000000 ); +} + +TEST_F( qthread, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Qthread >( 1000000 ); +} + +TEST_F( qthread, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Qthread >( 1000000 ); +} + +TEST_F( qthread, team_long_reduce) { + TestReduceTeam< long , Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 1000000 ); +} + +TEST_F( qthread, team_double_reduce) { + TestReduceTeam< double , Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 1000000 ); +} + + +TEST_F( qthread , atomics ) +{ + const int loop_count = 1e4 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,3) ) ); + +#if defined( KOKKOS_ENABLE_ASM ) + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,3) ) ); +#endif + +} + +TEST_F( qthread , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::Qthread > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Qthread > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Qthread > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( qthread , view_aggregate ) +{ + TestViewAggregate< Kokkos::Qthread >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( qthread , scan ) +{ + TestScan< Kokkos::Qthread >::test_range( 1 , 1000 ); + TestScan< Kokkos::Qthread >( 1000000 ); + TestScan< Kokkos::Qthread >( 10000000 ); + Kokkos::Qthread::fence(); +} + +TEST_F( qthread, team_shared ) { + TestSharedTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >(); +} + +TEST_F( qthread, shmem_size) { + TestShmemSize< Kokkos::Qthread >(); +} + +TEST_F( qthread , team_scan ) +{ + TestScanTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 10000 ); +} + +#if 0 /* disable */ +TEST_F( qthread , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(4) ) ); +} +#endif + +//---------------------------------------------------------------------------- + +TEST_F( qthread , task_policy ) +{ + TestTaskPolicy::test_task_dep< Kokkos::Qthread >( 10 ); + for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Qthread >(i); + for ( long i = 0 ; i < 35 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Qthread >(i); +} + +TEST_F( qthread , task_team ) +{ + TestTaskPolicy::test_task_team< Kokkos::Qthread >(1000); +} + +//---------------------------------------------------------------------------- + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..be8b4f90a32d96ad12ff4bf3baafd4ab8dec11ca --- /dev/null +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { +namespace { + +template< class ExecSpace, class ScheduleType > +struct TestRange { + + typedef int value_type ; ///< typedef required for the parallel_reduce + + typedef Kokkos::View<int*,ExecSpace> view_type ; + + view_type m_flags ; + + struct VerifyInitTag {}; + struct ResetTag {}; + struct VerifyResetTag {}; + + TestRange( const size_t N ) + : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags"), N ) + {} + + static void test_for( const size_t N ) + { + TestRange functor(N); + + typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); + + Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType,VerifyInitTag>(0,N) , functor ); + + Kokkos::deep_copy( host_flags , functor.m_flags ); + + size_t error_count = 0 ; + for ( size_t i = 0 ; i < N ; ++i ) { + if ( int(i) != host_flags(i) ) ++error_count ; + } + ASSERT_EQ( error_count , size_t(0) ); + + Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType,ResetTag>(0,N) , functor ); + Kokkos::parallel_for( std::string("TestKernelFor") , Kokkos::RangePolicy<ExecSpace,ScheduleType,VerifyResetTag>(0,N) , functor ); + + Kokkos::deep_copy( host_flags , functor.m_flags ); + + error_count = 0 ; + for ( size_t i = 0 ; i < N ; ++i ) { + if ( int(2*i) != host_flags(i) ) ++error_count ; + } + ASSERT_EQ( error_count , size_t(0) ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i ) const + { m_flags(i) = i ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const VerifyInitTag & , const int i ) const + { if ( i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + + KOKKOS_INLINE_FUNCTION + void operator()( const ResetTag & , const int i ) const + { m_flags(i) = 2 * m_flags(i); } + + KOKKOS_INLINE_FUNCTION + void operator()( const VerifyResetTag & , const int i ) const + { if ( 2 * i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + + //---------------------------------------- + + struct OffsetTag {}; + + static void test_reduce( const size_t N ) + { + TestRange functor(N); + int total = 0 ; + + Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); + + Kokkos::parallel_reduce( "TestKernelReduce" , Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor , total ); + // sum( 0 .. N-1 ) + ASSERT_EQ( size_t((N-1)*(N)/2) , size_t(total) ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,ScheduleType,OffsetTag>(0,N) , functor , total ); + // sum( 1 .. N ) + ASSERT_EQ( size_t((N)*(N+1)/2) , size_t(total) ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i , value_type & update ) const + { update += m_flags(i); } + + KOKKOS_INLINE_FUNCTION + void operator()( const OffsetTag & , const int i , value_type & update ) const + { update += 1 + m_flags(i); } + + //---------------------------------------- + + static void test_scan( const size_t N ) + { + TestRange functor(N); + + Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); + + Kokkos::parallel_scan( "TestKernelScan" , Kokkos::RangePolicy<ExecSpace,ScheduleType,OffsetTag>(0,N) , functor ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const OffsetTag & , const int i , value_type & update , bool final ) const + { + update += m_flags(i); + + if ( final ) { + if ( update != (i*(i+1))/2 ) { + printf("TestRange::test_scan error %d : %d != %d\n",i,(i*(i+1))/2,m_flags(i)); + } + } + } + + static void test_dynamic_policy( const size_t N ) { + + + typedef Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + + { + Kokkos::View<size_t*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > count("Count",ExecSpace::concurrency()); + Kokkos::View<int*,ExecSpace> a("A",N); + + Kokkos::parallel_for( policy_t(0,N), + KOKKOS_LAMBDA (const typename policy_t::member_type& i) { + for(int k=0; k<(i<N/2?1:10000); k++ ) + a(i)++; + count(ExecSpace::hardware_thread_id())++; + }); + + int error = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { + lsum += ( a(i)!= (i<N/2?1:10000) ); + },error); + ASSERT_EQ(error,0); + + if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) { + size_t min = N; + size_t max = 0; + for(int t=0; t<ExecSpace::concurrency(); t++) { + if(count(t)<min) min = count(t); + if(count(t)>max) max = count(t); + } + ASSERT_TRUE(min<max); + //if(ExecSpace::concurrency()>2) + // ASSERT_TRUE(2*min<max); + } + + } + + { + Kokkos::View<size_t*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > count("Count",ExecSpace::concurrency()); + Kokkos::View<int*,ExecSpace> a("A",N); + + int sum = 0; + Kokkos::parallel_reduce( policy_t(0,N), + KOKKOS_LAMBDA (const typename policy_t::member_type& i, int& lsum) { + for(int k=0; k<(i<N/2?1:10000); k++ ) + a(i)++; + count(ExecSpace::hardware_thread_id())++; + lsum++; + },sum); + ASSERT_EQ(sum,N); + + int error = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { + lsum += ( a(i)!= (i<N/2?1:10000) ); + },error); + ASSERT_EQ(error,0); + + if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) { + size_t min = N; + size_t max = 0; + for(int t=0; t<ExecSpace::concurrency(); t++) { + if(count(t)<min) min = count(t); + if(count(t)>max) max = count(t); + } + ASSERT_TRUE(min<max); + //if(ExecSpace::concurrency()>2) + // ASSERT_TRUE(2*min<max); + } + } + + } +}; + +} /* namespace */ +} /* namespace Test */ + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp new file mode 100644 index 0000000000000000000000000000000000000000..53fc393bcc29e6133e4d71ffab87815b935ec9f9 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -0,0 +1,1872 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdexcept> +#include <sstream> +#include <iostream> +#include <limits> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< typename ScalarType , class DeviceType > +class ReduceFunctor +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + struct value_type { + ScalarType value[3] ; + }; + + const size_type nwork ; + + ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + + ReduceFunctor( const ReduceFunctor & rhs ) + : nwork( rhs.nwork ) {} + +/* + KOKKOS_INLINE_FUNCTION + void init( value_type & dst ) const + { + dst.value[0] = 0 ; + dst.value[1] = 0 ; + dst.value[2] = 0 ; + } +*/ + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst , + const volatile value_type & src ) const + { + dst.value[0] += src.value[0] ; + dst.value[1] += src.value[1] ; + dst.value[2] += src.value[2] ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type iwork , value_type & dst ) const + { + dst.value[0] += 1 ; + dst.value[1] += iwork + 1 ; + dst.value[2] += nwork - iwork ; + } +}; + +template< class DeviceType > +class ReduceFunctorFinal : public ReduceFunctor< long , DeviceType > { +public: + + typedef typename ReduceFunctor< long , DeviceType >::value_type value_type ; + + ReduceFunctorFinal( const size_t n ) + : ReduceFunctor<long,DeviceType>(n) + {} + + KOKKOS_INLINE_FUNCTION + void final( value_type & dst ) const + { + dst.value[0] = - dst.value[0] ; + dst.value[1] = - dst.value[1] ; + dst.value[2] = - dst.value[2] ; + } +}; + +template< typename ScalarType , class DeviceType > +class RuntimeReduceFunctor +{ +public: + // Required for functor: + typedef DeviceType execution_space ; + typedef ScalarType value_type[] ; + const unsigned value_count ; + + + // Unit test details: + + typedef typename execution_space::size_type size_type ; + + const size_type nwork ; + + RuntimeReduceFunctor( const size_type arg_nwork , + const size_type arg_count ) + : value_count( arg_count ) + , nwork( arg_nwork ) {} + + KOKKOS_INLINE_FUNCTION + void init( ScalarType dst[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile ScalarType dst[] , + const volatile ScalarType src[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type iwork , ScalarType dst[] ) const + { + const size_type tmp[3] = { 1 , iwork + 1 , nwork - iwork }; + + for ( size_type i = 0 ; i < value_count ; ++i ) { + dst[i] += tmp[ i % 3 ]; + } + } +}; + +template< typename ScalarType , class DeviceType > +class RuntimeReduceMinMax +{ +public: + // Required for functor: + typedef DeviceType execution_space ; + typedef ScalarType value_type[] ; + const unsigned value_count ; + + // Unit test details: + + typedef typename execution_space::size_type size_type ; + + const size_type nwork ; + const ScalarType amin ; + const ScalarType amax ; + + RuntimeReduceMinMax( const size_type arg_nwork , + const size_type arg_count ) + : value_count( arg_count ) + , nwork( arg_nwork ) + , amin( std::numeric_limits<ScalarType>::min() ) + , amax( std::numeric_limits<ScalarType>::max() ) + {} + + KOKKOS_INLINE_FUNCTION + void init( ScalarType dst[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) { + dst[i] = i % 2 ? amax : amin ; + } + } + + KOKKOS_INLINE_FUNCTION + void join( volatile ScalarType dst[] , + const volatile ScalarType src[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) { + dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min + : ( dst[i] > src[i] ? dst[i] : src[i] ); // max + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_type iwork , ScalarType dst[] ) const + { + const ScalarType tmp[2] = { ScalarType(iwork + 1) + , ScalarType(nwork - iwork) }; + + for ( size_type i = 0 ; i < value_count ; ++i ) { + dst[i] = i % 2 ? ( dst[i] < tmp[i%2] ? dst[i] : tmp[i%2] ) + : ( dst[i] > tmp[i%2] ? dst[i] : tmp[i%2] ); + } + } +}; + +template< class DeviceType > +class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long , DeviceType > { +public: + + typedef RuntimeReduceFunctor< long , DeviceType > base_type ; + typedef typename base_type::value_type value_type ; + typedef long scalar_type ; + + RuntimeReduceFunctorFinal( const size_t theNwork , const size_t count ) : base_type(theNwork,count) {} + + KOKKOS_INLINE_FUNCTION + void final( value_type dst ) const + { + for ( unsigned i = 0 ; i < base_type::value_count ; ++i ) { + dst[i] = - dst[i] ; + } + } +}; +} // namespace Test + +namespace { + +template< typename ScalarType , class DeviceType > +class TestReduce +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + //------------------------------------ + + TestReduce( const size_type & nwork ) + { + run_test(nwork); + run_test_final(nwork); + } + + void run_test( const size_type & nwork ) + { + typedef Test::ReduceFunctor< ScalarType , execution_space > functor_type ; + typedef typename functor_type::value_type value_type ; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + value_type result[ Repeat ]; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); + } + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + } + } + } + + void run_test_final( const size_type & nwork ) + { + typedef Test::ReduceFunctorFinal< execution_space > functor_type ; + typedef typename functor_type::value_type value_type ; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + value_type result[ Repeat ]; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + if(i%2==0) + Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); + else + Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork) , result[i] ); + } + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( (ScalarType) correct , - result[i].value[j] ); + } + } + } +}; + +template< typename ScalarType , class DeviceType > +class TestReduceDynamic +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + //------------------------------------ + + TestReduceDynamic( const size_type nwork ) + { + run_test_dynamic(nwork); + run_test_dynamic_minmax(nwork); + run_test_dynamic_final(nwork); + } + + void run_test_dynamic( const size_type nwork ) + { + typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + ScalarType result[ Repeat ][ Count ] ; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + if(i%2==0) + Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); + else + Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + } + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( (ScalarType) correct , result[i][j] ); + } + } + } + + void run_test_dynamic_minmax( const size_type nwork ) + { + typedef Test::RuntimeReduceMinMax< ScalarType , execution_space > functor_type ; + + enum { Count = 2 }; + enum { Repeat = 100 }; + + ScalarType result[ Repeat ][ Count ] ; + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + if(i%2==0) + Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); + else + Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + } + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = j % 2 ? 1 : nwork ; + ASSERT_EQ( (ScalarType) correct , result[i][j] ); + } + } + } + + void run_test_dynamic_final( const size_type nwork ) + { + typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type ; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + typename functor_type::scalar_type result[ Repeat ][ Count ] ; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + if(i%2==0) + Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); + else + Kokkos::parallel_reduce( "TestKernelReduce" , nwork , functor_type(nwork,Count) , result[i] ); + + } + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( (ScalarType) correct , - result[i][j] ); + } + } + } +}; + +template< typename ScalarType , class DeviceType > +class TestReduceDynamicView +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + //------------------------------------ + + TestReduceDynamicView( const size_type nwork ) + { + run_test_dynamic_view(nwork); + } + + void run_test_dynamic_view( const size_type nwork ) + { + typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + + typedef Kokkos::View< ScalarType* , DeviceType > result_type ; + typedef typename result_type::HostMirror result_host_type ; + + const unsigned CountLimit = 23 ; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + for ( unsigned count = 0 ; count < CountLimit ; ++count ) { + + result_type result("result",count); + result_host_type host_result = Kokkos::create_mirror( result ); + + // Test result to host pointer: + + std::string str("TestKernelReduce"); + if(count%2==0) + Kokkos::parallel_reduce( nw , functor_type(nw,count) , host_result.ptr_on_device() ); + else + Kokkos::parallel_reduce( str , nw , functor_type(nw,count) , host_result.ptr_on_device() ); + + for ( unsigned j = 0 ; j < count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( host_result(j), (ScalarType) correct ); + host_result(j) = 0 ; + } + } + } +}; +} + +// Computes y^T*A*x +// (modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) + +#if ( ! defined( KOKKOS_HAVE_CUDA ) ) || defined( KOKKOS_CUDA_USE_LAMBDA ) + +template< typename ScalarType , class DeviceType > +class TestTripleNestedReduce +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + //------------------------------------ + + TestTripleNestedReduce( const size_type & nrows , const size_type & ncols + , const size_type & team_size , const size_type & vector_length ) + { + run_test( nrows , ncols , team_size, vector_length ); + } + + void run_test( const size_type & nrows , const size_type & ncols + , const size_type & team_size, const size_type & vector_length ) + { + //typedef Kokkos::LayoutLeft Layout; + typedef Kokkos::LayoutRight Layout; + + typedef Kokkos::View<ScalarType* , DeviceType> ViewVector; + typedef Kokkos::View<ScalarType** , Layout , DeviceType> ViewMatrix; + ViewVector y( "y" , nrows ); + ViewVector x( "x" , ncols ); + ViewMatrix A( "A" , nrows , ncols ); + + typedef Kokkos::RangePolicy<DeviceType> range_policy; + + // Initialize y vector + Kokkos::parallel_for( range_policy( 0 , nrows ) , KOKKOS_LAMBDA( const int i ) { y( i ) = 1; } ); + + // Initialize x vector + Kokkos::parallel_for( range_policy( 0 , ncols ) , KOKKOS_LAMBDA( const int i ) { x( i ) = 1; } ); + + typedef Kokkos::TeamPolicy<DeviceType> team_policy; + typedef typename Kokkos::TeamPolicy<DeviceType>::member_type member_type; + + // Initialize A matrix, note 2D indexing computation + Kokkos::parallel_for( team_policy( nrows , Kokkos::AUTO ) , KOKKOS_LAMBDA( const member_type& teamMember ) { + const int j = teamMember.league_rank(); + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , ncols ) , [&] ( const int i ) { + A( j , i ) = 1; + } ); + } ); + + // Three level parallelism kernel to force caching of vector x + ScalarType result = 0.0; + int chunk_size = 128; + Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) { + const int row_start = teamMember.league_rank() * chunk_size; + const int row_end = row_start + chunk_size; + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , row_start , row_end ) , [&] ( const int i ) { + ScalarType sum_i = 0.0; + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember , ncols ) , [&] ( const int j , ScalarType &innerUpdate ) { + innerUpdate += A( i , j ) * x( j ); + } , sum_i ); + Kokkos::single( Kokkos::PerThread( teamMember ) , [&] () { + update += y( i ) * sum_i; + } ); + } ); + } , result ); + + const ScalarType solution= ( ScalarType ) nrows * ( ScalarType ) ncols; + ASSERT_EQ( solution , result ); + } +}; + +#else /* #if ( ! defined( KOKKOS_HAVE_CUDA ) ) || defined( KOKKOS_CUDA_USE_LAMBDA ) */ + +template< typename ScalarType , class DeviceType > +class TestTripleNestedReduce +{ +public: + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + TestTripleNestedReduce( const size_type & , const size_type + , const size_type & , const size_type ) + { } +}; + +#endif + +//-------------------------------------------------------------------------- + +namespace Test { +namespace ReduceCombinatorical { + +template<class Scalar,class Space = Kokkos::HostSpace> +struct AddPlus { +public: + //Required + typedef AddPlus reducer_type; + typedef Scalar value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + +private: + result_view_type result; + +public: + + AddPlus(value_type& result_):result(&result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest += src + 1; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + dest += src + 1; + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val = value_type(); + } + + result_view_type result_view() const { + return result; + } +}; + +template<int ISTEAM> +struct FunctorScalar; + +template<> +struct FunctorScalar<0>{ + FunctorScalar(Kokkos::View<double> r):result(r) {} + Kokkos::View<double> result; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i,double& update) const { + update+=i; + } +}; + +template<> +struct FunctorScalar<1>{ + FunctorScalar(Kokkos::View<double> r):result(r) {} + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } +}; + +template<int ISTEAM> +struct FunctorScalarInit; + +template<> +struct FunctorScalarInit<0> { + FunctorScalarInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarInit<1> { + FunctorScalarInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; + +template<int ISTEAM> +struct FunctorScalarFinal; + + +template<> +struct FunctorScalarFinal<0> { + FunctorScalarFinal(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } +}; + +template<> +struct FunctorScalarFinal<1> { + FunctorScalarFinal(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team, double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } +}; + +template<int ISTEAM> +struct FunctorScalarJoin; + +template<> +struct FunctorScalarJoin<0> { + FunctorScalarJoin(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } +}; + +template<> +struct FunctorScalarJoin<1> { + FunctorScalarJoin(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } +}; + +template<int ISTEAM> +struct FunctorScalarJoinFinal; + +template<> +struct FunctorScalarJoinFinal<0> { + FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } +}; + +template<> +struct FunctorScalarJoinFinal<1> { + FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } +}; + +template<int ISTEAM> +struct FunctorScalarJoinInit; + +template<> +struct FunctorScalarJoinInit<0> { + FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarJoinInit<1> { + FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; + +template<int ISTEAM> +struct FunctorScalarJoinFinalInit; + +template<> +struct FunctorScalarJoinFinalInit<0> { + FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, double& update) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarJoinFinalInit<1> { + FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {} + + Kokkos::View<double> result; + + typedef Kokkos::TeamPolicy<>::member_type team_type; + KOKKOS_INLINE_FUNCTION + void operator() (const team_type& team,double& update) const { + update+=1.0/team.team_size()*team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile double& dst, const volatile double& update) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final(double& update) const { + result() = update; + } + + KOKKOS_INLINE_FUNCTION + void init(double& update) const { + update = 0.0; + } +}; +struct Functor1 { + KOKKOS_INLINE_FUNCTION + void operator() (const int& i,double& update) const { + update+=i; + } +}; + +struct Functor2 { + typedef double value_type[]; + const unsigned value_count; + + Functor2(unsigned n):value_count(n){} + + KOKKOS_INLINE_FUNCTION + void operator() (const unsigned& i,double update[]) const { + for(unsigned j=0;j<value_count;j++) + update[j]+=i; + } + + KOKKOS_INLINE_FUNCTION + void init( double dst[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double dst[] , + const volatile double src[] ) const + { + for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; + } +}; + +} +} + +namespace Test { + +template<class ExecSpace = Kokkos::DefaultExecutionSpace> +struct TestReduceCombinatoricalInstantiation { + template<class ... Args> + static void CallParallelReduce(Args... args) { + Kokkos::parallel_reduce(args...); + } + + template<class ... Args> + static void AddReturnArgument(Args... args) { + Kokkos::View<double,Kokkos::HostSpace> result_view("ResultView"); + double expected_result = 1000.0*999.0/2.0; + + double value = 0; + Kokkos::parallel_reduce(args...,value); + ASSERT_EQ(expected_result,value); + + result_view() = 0; + CallParallelReduce(args...,result_view); + ASSERT_EQ(expected_result,result_view()); + + value = 0; + CallParallelReduce(args...,Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&value)); + ASSERT_EQ(expected_result,value); + + result_view() = 0; + const Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> result_view_const_um = result_view; + CallParallelReduce(args...,result_view_const_um); + ASSERT_EQ(expected_result,result_view_const_um()); + + value = 0; + CallParallelReduce(args...,Test::ReduceCombinatorical::AddPlus<double>(value)); + if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) + ASSERT_TRUE(expected_result<value); + else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1)) + ASSERT_TRUE(expected_result<=value); + else + ASSERT_EQ(expected_result,value); + + value = 0; + Test::ReduceCombinatorical::AddPlus<double> add(value); + CallParallelReduce(args...,add); + if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) + ASSERT_TRUE(expected_result<value); + else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1)) + ASSERT_TRUE(expected_result<=value); + else + ASSERT_EQ(expected_result,value); + } + + + template<class ... Args> + static void AddLambdaRange(void*,Args... args) { + AddReturnArgument(args..., KOKKOS_LAMBDA (const int&i , double& lsum) { + lsum += i; + }); + } + + template<class ... Args> + static void AddLambdaTeam(void*,Args... args) { + AddReturnArgument(args..., KOKKOS_LAMBDA (const Kokkos::TeamPolicy<>::member_type& team, double& update) { + update+=1.0/team.team_size()*team.league_rank(); + }); + } + + template<class ... Args> + static void AddLambdaRange(Kokkos::InvalidType,Args... args) { + } + + template<class ... Args> + static void AddLambdaTeam(Kokkos::InvalidType,Args... args) { + } + + template<int ISTEAM, class ... Args> + static void AddFunctor(Args... args) { + Kokkos::View<double> result_view("FunctorView"); + auto h_r = Kokkos::create_mirror_view(result_view); + Test::ReduceCombinatorical::FunctorScalar<ISTEAM> functor(result_view); + double expected_result = 1000.0*999.0/2.0; + + AddReturnArgument(args..., functor); + AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalar<ISTEAM>(result_view)); + AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarInit<ISTEAM>(result_view)); + AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoin<ISTEAM>(result_view)); + AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoinInit<ISTEAM>(result_view)); + + h_r() = 0; + Kokkos::deep_copy(result_view,h_r); + CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarFinal<ISTEAM>(result_view)); + Kokkos::deep_copy(h_r,result_view); + ASSERT_EQ(expected_result,h_r()); + + h_r() = 0; + Kokkos::deep_copy(result_view,h_r); + CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal<ISTEAM>(result_view)); + Kokkos::deep_copy(h_r,result_view); + ASSERT_EQ(expected_result,h_r()); + + h_r() = 0; + Kokkos::deep_copy(result_view,h_r); + CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit<ISTEAM>(result_view)); + Kokkos::deep_copy(h_r,result_view); + ASSERT_EQ(expected_result,h_r()); + } + + template<class ... Args> + static void AddFunctorLambdaRange(Args... args) { + AddFunctor<0,Args...>(args...); + #ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + AddLambdaRange(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...); + #endif + } + + template<class ... Args> + static void AddFunctorLambdaTeam(Args... args) { + AddFunctor<1,Args...>(args...); + #ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + AddLambdaTeam(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...); + #endif + } + + template<class ... Args> + static void AddPolicy(Args... args) { + int N = 1000; + Kokkos::RangePolicy<ExecSpace> policy(0,N); + + AddFunctorLambdaRange(args...,1000); + AddFunctorLambdaRange(args...,N); + AddFunctorLambdaRange(args...,policy); + AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace>(0,N)); + AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N)); + AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(0,N).set_chunk_size(10)); + AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N).set_chunk_size(10)); + + AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace>(N,Kokkos::AUTO)); + AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO)); + AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(N,Kokkos::AUTO).set_chunk_size(10)); + AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO).set_chunk_size(10)); + } + + + static void AddLabel() { + std::string s("Std::String"); + AddPolicy(); + AddPolicy("Char Constant"); + AddPolicy(s.c_str()); + AddPolicy(s); + } + + static void execute() { + AddLabel(); + } +}; + +template<class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace> +struct TestReducers { + + struct SumFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value += values(i); + } + }; + + struct ProdFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value *= values(i); + } + }; + + struct MinFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + if(values(i) < value) + value = values(i); + } + }; + + struct MaxFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + if(values(i) > value) + value = values(i); + } + }; + + struct MinLocFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, + typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type& value) const { + if(values(i) < value.val) { + value.val = values(i); + value.loc = i; + } + } + }; + + struct MaxLocFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, + typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type& value) const { + if(values(i) > value.val) { + value.val = values(i); + value.loc = i; + } + } + }; + + struct MinMaxLocFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, + typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type& value) const { + if(values(i) > value.max_val) { + value.max_val = values(i); + value.max_loc = i; + } + if(values(i) < value.min_val) { + value.min_val = values(i); + value.min_loc = i; + } + } + }; + + struct BAndFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value & values(i); + } + }; + + struct BOrFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value | values(i); + } + }; + + struct BXorFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value ^ values(i); + } + }; + + struct LAndFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value && values(i); + } + }; + + struct LOrFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value || values(i); + } + }; + + struct LXorFunctor { + Kokkos::View<const Scalar*,ExecSpace> values; + KOKKOS_INLINE_FUNCTION + void operator() (const int& i, Scalar& value) const { + value = value ? (!values(i)) : values(i); + } + }; + + static void test_sum(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_sum = 0; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100); + reference_sum += h_values(i); + } + Kokkos::deep_copy(values,h_values); + + SumFunctor f; + f.values = values; + Scalar init = 0; + + { + Scalar sum_scalar = init; + Kokkos::Experimental::Sum<Scalar> reducer_scalar(sum_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(sum_scalar,reference_sum); + Scalar sum_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(sum_scalar_view,reference_sum); + } + { + Scalar sum_scalar_init = init; + Kokkos::Experimental::Sum<Scalar> reducer_scalar_init(sum_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(sum_scalar_init,reference_sum); + Scalar sum_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(sum_scalar_init_view,reference_sum); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> sum_view("View"); + sum_view() = init; + Kokkos::Experimental::Sum<Scalar> reducer_view(sum_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar sum_view_scalar = sum_view(); + ASSERT_EQ(sum_view_scalar,reference_sum); + Scalar sum_view_view = reducer_view.result_view()(); + ASSERT_EQ(sum_view_view,reference_sum); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> sum_view_init("View"); + sum_view_init() = init; + Kokkos::Experimental::Sum<Scalar> reducer_view_init(sum_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Scalar sum_view_init_scalar = sum_view_init(); + ASSERT_EQ(sum_view_init_scalar,reference_sum); + Scalar sum_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(sum_view_init_view,reference_sum); + } + } + + static void test_prod(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_prod = 1; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%4+1); + reference_prod *= h_values(i); + } + Kokkos::deep_copy(values,h_values); + + ProdFunctor f; + f.values = values; + Scalar init = 1; + + if(std::is_arithmetic<Scalar>::value) + { + Scalar prod_scalar = init; + Kokkos::Experimental::Prod<Scalar> reducer_scalar(prod_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(prod_scalar,reference_prod); + Scalar prod_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(prod_scalar_view,reference_prod); + } + { + Scalar prod_scalar_init = init; + Kokkos::Experimental::Prod<Scalar> reducer_scalar_init(prod_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(prod_scalar_init,reference_prod); + Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(prod_scalar_init_view,reference_prod); + } + + if(std::is_arithmetic<Scalar>::value) + { + Kokkos::View<Scalar,Kokkos::HostSpace> prod_view("View"); + prod_view() = init; + Kokkos::Experimental::Prod<Scalar> reducer_view(prod_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar prod_view_scalar = prod_view(); + ASSERT_EQ(prod_view_scalar,reference_prod); + Scalar prod_view_view = reducer_view.result_view()(); + ASSERT_EQ(prod_view_view,reference_prod); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> prod_view_init("View"); + prod_view_init() = init; + Kokkos::Experimental::Prod<Scalar> reducer_view_init(prod_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Scalar prod_view_init_scalar = prod_view_init(); + ASSERT_EQ(prod_view_init_scalar,reference_prod); + Scalar prod_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(prod_view_init_view,reference_prod); + } + } + + static void test_min(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_min = std::numeric_limits<Scalar>::max(); + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000); + if(h_values(i)<reference_min) + reference_min = h_values(i); + } + Kokkos::deep_copy(values,h_values); + + MinFunctor f; + f.values = values; + Scalar init = std::numeric_limits<Scalar>::max(); + + { + Scalar min_scalar = init; + Kokkos::Experimental::Min<Scalar> reducer_scalar(min_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(min_scalar,reference_min); + Scalar min_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(min_scalar_view,reference_min); + } + { + Scalar min_scalar_init = init; + Kokkos::Experimental::Min<Scalar> reducer_scalar_init(min_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(min_scalar_init,reference_min); + Scalar min_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(min_scalar_init_view,reference_min); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> min_view("View"); + min_view() = init; + Kokkos::Experimental::Min<Scalar> reducer_view(min_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar min_view_scalar = min_view(); + ASSERT_EQ(min_view_scalar,reference_min); + Scalar min_view_view = reducer_view.result_view()(); + ASSERT_EQ(min_view_view,reference_min); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> min_view_init("View"); + min_view_init() = init; + Kokkos::Experimental::Min<Scalar> reducer_view_init(min_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Scalar min_view_init_scalar = min_view_init(); + ASSERT_EQ(min_view_init_scalar,reference_min); + Scalar min_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(min_view_init_view,reference_min); + } + } + + static void test_max(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_max = std::numeric_limits<Scalar>::min(); + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000+1); + if(h_values(i)>reference_max) + reference_max = h_values(i); + } + Kokkos::deep_copy(values,h_values); + + MaxFunctor f; + f.values = values; + Scalar init = std::numeric_limits<Scalar>::min(); + + { + Scalar max_scalar = init; + Kokkos::Experimental::Max<Scalar> reducer_scalar(max_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(max_scalar,reference_max); + Scalar max_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(max_scalar_view,reference_max); + } + { + Scalar max_scalar_init = init; + Kokkos::Experimental::Max<Scalar> reducer_scalar_init(max_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(max_scalar_init,reference_max); + Scalar max_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(max_scalar_init_view,reference_max); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> max_view("View"); + max_view() = init; + Kokkos::Experimental::Max<Scalar> reducer_view(max_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar max_view_scalar = max_view(); + ASSERT_EQ(max_view_scalar,reference_max); + Scalar max_view_view = reducer_view.result_view()(); + ASSERT_EQ(max_view_view,reference_max); + } + { + Kokkos::View<Scalar,Kokkos::HostSpace> max_view_init("View"); + max_view_init() = init; + Kokkos::Experimental::Max<Scalar> reducer_view_init(max_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Scalar max_view_init_scalar = max_view_init(); + ASSERT_EQ(max_view_init_scalar,reference_max); + Scalar max_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(max_view_init_view,reference_max); + } + } + + static void test_minloc(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_min = std::numeric_limits<Scalar>::max(); + int reference_loc = -1; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000); + if(h_values(i)<reference_min) { + reference_min = h_values(i); + reference_loc = i; + } + } + Kokkos::deep_copy(values,h_values); + + MinLocFunctor f; + typedef typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type value_type; + f.values = values; + Scalar init = std::numeric_limits<Scalar>::max(); + + + { + value_type min_scalar; + Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar(min_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(min_scalar.val,reference_min); + ASSERT_EQ(min_scalar.loc,reference_loc); + value_type min_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(min_scalar_view.val,reference_min); + ASSERT_EQ(min_scalar_view.loc,reference_loc); + } + { + value_type min_scalar_init; + Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar_init(min_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(min_scalar_init.val,reference_min); + ASSERT_EQ(min_scalar_init.loc,reference_loc); + value_type min_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(min_scalar_init_view.val,reference_min); + ASSERT_EQ(min_scalar_init_view.loc,reference_loc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> min_view("View"); + Kokkos::Experimental::MinLoc<Scalar,int> reducer_view(min_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + value_type min_view_scalar = min_view(); + ASSERT_EQ(min_view_scalar.val,reference_min); + ASSERT_EQ(min_view_scalar.loc,reference_loc); + value_type min_view_view = reducer_view.result_view()(); + ASSERT_EQ(min_view_view.val,reference_min); + ASSERT_EQ(min_view_view.loc,reference_loc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> min_view_init("View"); + Kokkos::Experimental::MinLoc<Scalar,int> reducer_view_init(min_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + value_type min_view_init_scalar = min_view_init(); + ASSERT_EQ(min_view_init_scalar.val,reference_min); + ASSERT_EQ(min_view_init_scalar.loc,reference_loc); + value_type min_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(min_view_init_view.val,reference_min); + ASSERT_EQ(min_view_init_view.loc,reference_loc); + } + } + + static void test_maxloc(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_max = std::numeric_limits<Scalar>::min(); + int reference_loc = -1; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000); + if(h_values(i)>reference_max) { + reference_max = h_values(i); + reference_loc = i; + } + } + Kokkos::deep_copy(values,h_values); + + MaxLocFunctor f; + typedef typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type value_type; + f.values = values; + Scalar init = std::numeric_limits<Scalar>::min(); + + + { + value_type max_scalar; + Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar(max_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(max_scalar.val,reference_max); + ASSERT_EQ(max_scalar.loc,reference_loc); + value_type max_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(max_scalar_view.val,reference_max); + ASSERT_EQ(max_scalar_view.loc,reference_loc); + } + { + value_type max_scalar_init; + Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar_init(max_scalar_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(max_scalar_init.val,reference_max); + ASSERT_EQ(max_scalar_init.loc,reference_loc); + value_type max_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(max_scalar_init_view.val,reference_max); + ASSERT_EQ(max_scalar_init_view.loc,reference_loc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> max_view("View"); + Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view(max_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + value_type max_view_scalar = max_view(); + ASSERT_EQ(max_view_scalar.val,reference_max); + ASSERT_EQ(max_view_scalar.loc,reference_loc); + value_type max_view_view = reducer_view.result_view()(); + ASSERT_EQ(max_view_view.val,reference_max); + ASSERT_EQ(max_view_view.loc,reference_loc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> max_view_init("View"); + Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view_init(max_view_init,init); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + value_type max_view_init_scalar = max_view_init(); + ASSERT_EQ(max_view_init_scalar.val,reference_max); + ASSERT_EQ(max_view_init_scalar.loc,reference_loc); + value_type max_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(max_view_init_view.val,reference_max); + ASSERT_EQ(max_view_init_view.loc,reference_loc); + } + } + + static void test_minmaxloc(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_max = std::numeric_limits<Scalar>::min(); + Scalar reference_min = std::numeric_limits<Scalar>::max(); + int reference_minloc = -1; + int reference_maxloc = -1; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000); + if(h_values(i)>reference_max) { + reference_max = h_values(i); + reference_maxloc = i; + } + if(h_values(i)<reference_min) { + reference_min = h_values(i); + reference_minloc = i; + } + } + Kokkos::deep_copy(values,h_values); + + MinMaxLocFunctor f; + typedef typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type value_type; + f.values = values; + Scalar init_min = std::numeric_limits<Scalar>::max(); + Scalar init_max = std::numeric_limits<Scalar>::min(); + + + { + value_type minmax_scalar; + Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar(minmax_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(minmax_scalar.min_val,reference_min); + ASSERT_EQ(minmax_scalar.min_loc,reference_minloc); + ASSERT_EQ(minmax_scalar.max_val,reference_max); + ASSERT_EQ(minmax_scalar.max_loc,reference_maxloc); + value_type minmax_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(minmax_scalar_view.min_val,reference_min); + ASSERT_EQ(minmax_scalar_view.min_loc,reference_minloc); + ASSERT_EQ(minmax_scalar_view.max_val,reference_max); + ASSERT_EQ(minmax_scalar_view.max_loc,reference_maxloc); + } + { + value_type minmax_scalar_init; + Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar_init(minmax_scalar_init,init_min,init_max); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); + ASSERT_EQ(minmax_scalar_init.min_val,reference_min); + ASSERT_EQ(minmax_scalar_init.min_loc,reference_minloc); + ASSERT_EQ(minmax_scalar_init.max_val,reference_max); + ASSERT_EQ(minmax_scalar_init.max_loc,reference_maxloc); + value_type minmax_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ(minmax_scalar_init_view.min_val,reference_min); + ASSERT_EQ(minmax_scalar_init_view.min_loc,reference_minloc); + ASSERT_EQ(minmax_scalar_init_view.max_val,reference_max); + ASSERT_EQ(minmax_scalar_init_view.max_loc,reference_maxloc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> minmax_view("View"); + Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view(minmax_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + value_type minmax_view_scalar = minmax_view(); + ASSERT_EQ(minmax_view_scalar.min_val,reference_min); + ASSERT_EQ(minmax_view_scalar.min_loc,reference_minloc); + ASSERT_EQ(minmax_view_scalar.max_val,reference_max); + ASSERT_EQ(minmax_view_scalar.max_loc,reference_maxloc); + value_type minmax_view_view = reducer_view.result_view()(); + ASSERT_EQ(minmax_view_view.min_val,reference_min); + ASSERT_EQ(minmax_view_view.min_loc,reference_minloc); + ASSERT_EQ(minmax_view_view.max_val,reference_max); + ASSERT_EQ(minmax_view_view.max_loc,reference_maxloc); + } + { + Kokkos::View<value_type,Kokkos::HostSpace> minmax_view_init("View"); + Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view_init(minmax_view_init,init_min,init_max); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + value_type minmax_view_init_scalar = minmax_view_init(); + ASSERT_EQ(minmax_view_init_scalar.min_val,reference_min); + ASSERT_EQ(minmax_view_init_scalar.min_loc,reference_minloc); + ASSERT_EQ(minmax_view_init_scalar.max_val,reference_max); + ASSERT_EQ(minmax_view_init_scalar.max_loc,reference_maxloc); + value_type minmax_view_init_view = reducer_view_init.result_view()(); + ASSERT_EQ(minmax_view_init_view.min_val,reference_min); + ASSERT_EQ(minmax_view_init_view.min_loc,reference_minloc); + ASSERT_EQ(minmax_view_init_view.max_val,reference_max); + ASSERT_EQ(minmax_view_init_view.max_loc,reference_maxloc); + } + } + + static void test_BAnd(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_band = Scalar() | (~Scalar()); + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%100000+1); + reference_band = reference_band & h_values(i); + } + Kokkos::deep_copy(values,h_values); + + BAndFunctor f; + f.values = values; + Scalar init = Scalar() | (~Scalar()); + + { + Scalar band_scalar = init; + Kokkos::Experimental::BAnd<Scalar> reducer_scalar(band_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(band_scalar,reference_band); + Scalar band_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(band_scalar_view,reference_band); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> band_view("View"); + band_view() = init; + Kokkos::Experimental::BAnd<Scalar> reducer_view(band_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar band_view_scalar = band_view(); + ASSERT_EQ(band_view_scalar,reference_band); + Scalar band_view_view = reducer_view.result_view()(); + ASSERT_EQ(band_view_view,reference_band); + } + } + + static void test_BOr(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_bor = Scalar() & (~Scalar()); + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)((rand()%100000+1)*2); + reference_bor = reference_bor | h_values(i); + } + Kokkos::deep_copy(values,h_values); + + BOrFunctor f; + f.values = values; + Scalar init = Scalar() & (~Scalar()); + + { + Scalar bor_scalar = init; + Kokkos::Experimental::BOr<Scalar> reducer_scalar(bor_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(bor_scalar,reference_bor); + Scalar bor_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(bor_scalar_view,reference_bor); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> bor_view("View"); + bor_view() = init; + Kokkos::Experimental::BOr<Scalar> reducer_view(bor_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar bor_view_scalar = bor_view(); + ASSERT_EQ(bor_view_scalar,reference_bor); + Scalar bor_view_view = reducer_view.result_view()(); + ASSERT_EQ(bor_view_view,reference_bor); + } + } + + static void test_BXor(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_bxor = Scalar() & (~Scalar()); + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)((rand()%100000+1)*2); + reference_bxor = reference_bxor ^ h_values(i); + } + Kokkos::deep_copy(values,h_values); + + BXorFunctor f; + f.values = values; + Scalar init = Scalar() & (~Scalar()); + + { + Scalar bxor_scalar = init; + Kokkos::Experimental::BXor<Scalar> reducer_scalar(bxor_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(bxor_scalar,reference_bxor); + Scalar bxor_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(bxor_scalar_view,reference_bxor); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> bxor_view("View"); + bxor_view() = init; + Kokkos::Experimental::BXor<Scalar> reducer_view(bxor_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar bxor_view_scalar = bxor_view(); + ASSERT_EQ(bxor_view_scalar,reference_bxor); + Scalar bxor_view_view = reducer_view.result_view()(); + ASSERT_EQ(bxor_view_view,reference_bxor); + } + } + + static void test_LAnd(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_land = 1; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%2); + reference_land = reference_land && h_values(i); + } + Kokkos::deep_copy(values,h_values); + + LAndFunctor f; + f.values = values; + Scalar init = 1; + + { + Scalar land_scalar = init; + Kokkos::Experimental::LAnd<Scalar> reducer_scalar(land_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(land_scalar,reference_land); + Scalar land_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(land_scalar_view,reference_land); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> land_view("View"); + land_view() = init; + Kokkos::Experimental::LAnd<Scalar> reducer_view(land_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar land_view_scalar = land_view(); + ASSERT_EQ(land_view_scalar,reference_land); + Scalar land_view_view = reducer_view.result_view()(); + ASSERT_EQ(land_view_view,reference_land); + } + } + + static void test_LOr(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_lor = 0; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%2); + reference_lor = reference_lor || h_values(i); + } + Kokkos::deep_copy(values,h_values); + + LOrFunctor f; + f.values = values; + Scalar init = 0; + + { + Scalar lor_scalar = init; + Kokkos::Experimental::LOr<Scalar> reducer_scalar(lor_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(lor_scalar,reference_lor); + Scalar lor_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(lor_scalar_view,reference_lor); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> lor_view("View"); + lor_view() = init; + Kokkos::Experimental::LOr<Scalar> reducer_view(lor_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar lor_view_scalar = lor_view(); + ASSERT_EQ(lor_view_scalar,reference_lor); + Scalar lor_view_view = reducer_view.result_view()(); + ASSERT_EQ(lor_view_view,reference_lor); + } + } + + static void test_LXor(int N) { + Kokkos::View<Scalar*,ExecSpace> values("Values",N); + auto h_values = Kokkos::create_mirror_view(values); + Scalar reference_lxor = 0; + for(int i=0; i<N; i++) { + h_values(i) = (Scalar)(rand()%2); + reference_lxor = reference_lxor ? (!h_values(i)) : h_values(i); + } + Kokkos::deep_copy(values,h_values); + + LXorFunctor f; + f.values = values; + Scalar init = 0; + + { + Scalar lxor_scalar = init; + Kokkos::Experimental::LXor<Scalar> reducer_scalar(lxor_scalar); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); + ASSERT_EQ(lxor_scalar,reference_lxor); + Scalar lxor_scalar_view = reducer_scalar.result_view()(); + ASSERT_EQ(lxor_scalar_view,reference_lxor); + } + + { + Kokkos::View<Scalar,Kokkos::HostSpace> lxor_view("View"); + lxor_view() = init; + Kokkos::Experimental::LXor<Scalar> reducer_view(lxor_view); + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Scalar lxor_view_scalar = lxor_view(); + ASSERT_EQ(lxor_view_scalar,reference_lxor); + Scalar lxor_view_view = reducer_view.result_view()(); + ASSERT_EQ(lxor_view_view,reference_lxor); + } + } + + static void execute_float() { + test_sum(10001); + test_prod(35); + test_min(10003); + test_minloc(10003); + test_max(10007); + test_maxloc(10007); + test_minmaxloc(10007); + } + + static void execute_integer() { + test_sum(10001); + test_prod(35); + test_min(10003); + test_minloc(10003); + test_max(10007); + test_maxloc(10007); + test_minmaxloc(10007); + test_BAnd(35); + test_BOr(35); + test_BXor(35); + test_LAnd(35); + test_LOr(35); + test_LXor(35); + } + + static void execute_basic() { + test_sum(10001); + test_prod(35); + } +}; +} + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3eeea57043ece1142be96ed15dcbab3aa8a9285f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -0,0 +1,103 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/*--------------------------------------------------------------------------*/ + +#include <stdio.h> + +namespace Test { + +template< class Device , class WorkSpec = size_t > +struct TestScan { + + typedef Device execution_space ; + typedef long int value_type ; + + KOKKOS_INLINE_FUNCTION + void operator()( const int iwork , value_type & update , const bool final_pass ) const + { + const value_type n = iwork + 1 ; + const value_type imbalance = ( (1000 <= n) && (0 == n % 1000) ) ? 1000 : 0 ; + + // Insert an artificial load imbalance + + for ( value_type i = 0 ; i < imbalance ; ++i ) { ++update ; } + + update += n - imbalance ; + + if ( final_pass ) { + const value_type answer = n & 1 ? ( n * ( ( n + 1 ) / 2 ) ) : ( ( n / 2 ) * ( n + 1 ) ); + + if ( answer != update ) { + printf("TestScan(%d,%ld) != %ld\n",iwork,update,answer); + } + } + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & update ) const { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & update , + volatile const value_type & input ) const + { update += input ; } + + TestScan( const WorkSpec & N ) + { parallel_scan( N , *this ); } + + TestScan( const WorkSpec & Start , const WorkSpec & N ) + { + typedef Kokkos::RangePolicy<execution_space> exec_policy ; + parallel_scan( exec_policy( Start , N ) , *this ); + } + + static void test_range( const WorkSpec & begin , const WorkSpec & end ) + { + for ( WorkSpec i = begin ; i < end ; ++i ) { + (void) TestScan( i ); + } + } +}; + +} + diff --git a/lib/kokkos/core/unit_test/TestSerial.cpp b/lib/kokkos/core/unit_test/TestSerial.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d85614e66e67af2ccae9979d7f3869cbf5165c1d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSerial.cpp @@ -0,0 +1,571 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <impl/Kokkos_ViewTileLeft.hpp> +#include <TestTile.hpp> + +#include <impl/Kokkos_Serial_TaskPolicy.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskPolicy.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::HostSpace::execution_space::initialize(); + } + static void TearDownTestCase() + { + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( serial , md_range ) { + TestMDRange_2D< Kokkos::Serial >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100); +} + +TEST_F( serial , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >(); +} + +TEST_F( serial, policy_construction) { + TestRangePolicyConstruction< Kokkos::Serial >(); + TestTeamPolicyConstruction< Kokkos::Serial >(); +} + +TEST_F( serial , impl_view_mapping ) { + test_view_mapping< Kokkos::Serial >(); + test_view_mapping_subview< Kokkos::Serial >(); + test_view_mapping_operator< Kokkos::Serial >(); + TestViewMappingAtomic< Kokkos::Serial >::run(); +} + +TEST_F( serial, view_impl) { + test_view_impl< Kokkos::Serial >(); +} + +TEST_F( serial, view_api) { + TestViewAPI< double , Kokkos::Serial >(); +} + +TEST_F( serial , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::Serial >(); +} + +TEST_F( serial , range_tag ) +{ + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + +TEST_F( serial , team_tag ) +{ + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( serial, long_reduce) { + TestReduce< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, double_reduce) { + TestReduce< double , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial , reducers ) +{ + TestReducers<int, Kokkos::Serial>::execute_integer(); + TestReducers<size_t, Kokkos::Serial>::execute_integer(); + TestReducers<double, Kokkos::Serial>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic(); +} + +TEST_F( serial, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial , scan ) +{ + TestScan< Kokkos::Serial >::test_range( 1 , 1000 ); + TestScan< Kokkos::Serial >( 10 ); + TestScan< Kokkos::Serial >( 10000 ); +} + +TEST_F( serial , team_long_reduce) { + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( serial , team_double_reduce) { + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( serial , team_shared_request) { + TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( serial , team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( serial, shmem_size) { + TestShmemSize< Kokkos::Serial >(); +} + +TEST_F( serial , team_scan ) +{ + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + + +TEST_F( serial , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::Serial > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Serial > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Serial > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , view_aggregate ) +{ + TestViewAggregate< Kokkos::Serial >(); + TestViewAggregateReduction< Kokkos::Serial >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , atomics ) +{ + const int loop_count = 1e6 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) ); +} + +TEST_F( serial , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) ); + } + +} +//---------------------------------------------------------------------------- + +TEST_F( serial, tile_layout ) +{ + TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 ); +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , memory_space ) +{ + TestMemorySpace< Kokkos::Serial >(); +} + +TEST_F( serial , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +TEST_F( serial , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestFib< Kokkos::Serial >::run(i); + } +} + +TEST_F( serial , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskPolicy::TestTaskDependence< Kokkos::Serial >::run(i); + } +} + +TEST_F( serial , task_team ) +{ + TestTaskPolicy::TestTaskTeam< Kokkos::Serial >::run(1000); + //TestTaskPolicy::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing +} + +TEST_F( serial , old_task_policy ) +{ + TestTaskPolicy::test_task_dep< Kokkos::Serial >( 10 ); + // TestTaskPolicy::test_norm2< Kokkos::Serial >( 1000 ); + // for ( long i = 0 ; i < 30 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i); + // for ( long i = 0 ; i < 40 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i); + for ( long i = 0 ; i < 20 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i); + for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i); +} + +TEST_F( serial , old_task_team ) +{ + TestTaskPolicy::test_task_team< Kokkos::Serial >(1000); +} + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + +//---------------------------------------------------------------------------- + +TEST_F( serial , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Serial >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) +TEST_F( serial , cxx11 ) +{ + if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) ); + } +} +#endif + +TEST_F( serial , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Serial >(); +} + +TEST_F( serial , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) ); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..61166888142e7f666b303dc1c837daa34c07a00c --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -0,0 +1,215 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +struct SharedAllocDestroy { + + volatile int * count ; + + SharedAllocDestroy() = default ; + SharedAllocDestroy( int * arg ) : count( arg ) {} + + void destroy_shared_allocation() + { + Kokkos::atomic_fetch_add( count , 1 ); + } + +}; + +template< class MemorySpace , class ExecutionSpace > +void test_shared_alloc() +{ +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + + typedef const Kokkos::Experimental::Impl::SharedAllocationHeader Header ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker Tracker ; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ; + typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ; + + static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" ); + + MemorySpace s ; + + const size_t N = 1200 ; + const size_t size = 8 ; + + RecordMemS * rarray[ N ]; + Header * harray[ N ]; + + RecordMemS ** const r = rarray ; + Header ** const h = harray ; + + Kokkos::RangePolicy< ExecutionSpace > range(0,N); + + //---------------------------------------- + { + // Since always executed on host space, leave [=] + Kokkos::parallel_for( range , [=]( size_t i ){ + char name[64] ; + sprintf(name,"test_%.2d",int(i)); + + r[i] = RecordMemS::allocate( s , name , size * ( i + 1 ) ); + h[i] = Header::get_header( r[i]->data() ); + + ASSERT_EQ( r[i]->use_count() , 0 ); + + for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + + ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); + ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + }); + + // Sanity check for the whole set of allocation records to which this record belongs. + RecordBase::is_sane( r[0] ); + // RecordMemS::print_records( std::cout , s , true ); + + Kokkos::parallel_for( range , [=]( size_t i ){ + while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); + } + }); + } + //---------------------------------------- + { + int destroy_count = 0 ; + SharedAllocDestroy counter( & destroy_count ); + + Kokkos::parallel_for( range , [=]( size_t i ){ + char name[64] ; + sprintf(name,"test_%.2d",int(i)); + + RecordFull * rec = RecordFull::allocate( s , name , size * ( i + 1 ) ); + + rec->m_destroy = counter ; + + r[i] = rec ; + h[i] = Header::get_header( r[i]->data() ); + + ASSERT_EQ( r[i]->use_count() , 0 ); + + for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + + ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); + ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + }); + + RecordBase::is_sane( r[0] ); + + Kokkos::parallel_for( range , [=]( size_t i ){ + while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); + } + }); + + ASSERT_EQ( destroy_count , int(N) ); + } + + //---------------------------------------- + { + int destroy_count = 0 ; + + { + RecordFull * rec = RecordFull::allocate( s , "test" , size ); + + // ... Construction of the allocated { rec->data() , rec->size() } + + // Copy destruction function object into the allocation record + rec->m_destroy = SharedAllocDestroy( & destroy_count ); + + ASSERT_EQ( rec->use_count() , 0 ); + + // Start tracking, increments the use count from 0 to 1 + Tracker track ; + + track.assign_allocated_record_to_uninitialized( rec ); + + ASSERT_EQ( rec->use_count() , 1 ); + ASSERT_EQ( track.use_count() , 1 ); + + // Verify construction / destruction increment + for ( size_t i = 0 ; i < N ; ++i ) { + ASSERT_EQ( rec->use_count() , 1 ); + { + Tracker local_tracker ; + local_tracker.assign_allocated_record_to_uninitialized( rec ); + ASSERT_EQ( rec->use_count() , 2 ); + ASSERT_EQ( local_tracker.use_count() , 2 ); + } + ASSERT_EQ( rec->use_count() , 1 ); + ASSERT_EQ( track.use_count() , 1 ); + } + + Kokkos::parallel_for( range , [=]( size_t i ){ + Tracker local_tracker ; + local_tracker.assign_allocated_record_to_uninitialized( rec ); + ASSERT_GT( rec->use_count() , 1 ); + }); + + ASSERT_EQ( rec->use_count() , 1 ); + ASSERT_EQ( track.use_count() , 1 ); + + // Destruction of 'track' object deallocates the 'rec' and invokes the destroy function object. + } + + ASSERT_EQ( destroy_count , 1 ); + } + +#endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */ + +} + + +} + diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9121dc15a17ecead1895ce1df660c1d25a2deda2 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSynchronic.cpp @@ -0,0 +1,448 @@ +/* + +Copyright (c) 2014, NVIDIA Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#undef _WIN32_WINNT +//#define _WIN32_WINNT 0x0602 + +#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__) + +// Skip for now + +#else + +#include <gtest/gtest.h> + +#ifdef USEOMP +#include <omp.h> +#endif + +#include <iostream> +#include <sstream> +#include <algorithm> +#include <string> +#include <vector> +#include <map> +#include <cstring> +#include <ctime> + +//#include <details/config> +//#undef __SYNCHRONIC_COMPATIBLE + +#include <impl/Kokkos_Synchronic.hpp> +#include <impl/Kokkos_Synchronic_n3998.hpp> + +#include "TestSynchronic.hpp" + +// Uncomment to allow test to dump output +//#define VERBOSE_TEST + +namespace Test { + +unsigned next_table[] = + { + 0, 1, 2, 3, //0-3 + 4, 4, 6, 6, //4-7 + 8, 8, 8, 8, //8-11 + 12, 12, 12, 12, //12-15 + 16, 16, 16, 16, //16-19 + 16, 16, 16, 16, //20-23 + 24, 24, 24, 24, //24-27 + 24, 24, 24, 24, //28-31 + 32, 32, 32, 32, //32-35 + 32, 32, 32, 32, //36-39 + 40, 40, 40, 40, //40-43 + 40, 40, 40, 40, //44-47 + 48, 48, 48, 48, //48-51 + 48, 48, 48, 48, //52-55 + 56, 56, 56, 56, //56-59 + 56, 56, 56, 56, //60-63 + }; + +//change this if you want to allow oversubscription of the system, by default only the range {1-(system size)} is tested +#define FOR_GAUNTLET(x) for(unsigned x = (std::min)(std::thread::hardware_concurrency()*8,unsigned(sizeof(next_table)/sizeof(unsigned))); x; x = next_table[x-1]) + +//set this to override the benchmark of barriers to use OMP barriers instead of n3998 std::barrier +//#define USEOMP + +#if defined(__SYNCHRONIC_COMPATIBLE) + #define PREFIX "futex-" +#else + #define PREFIX "backoff-" +#endif + +//this test uses a custom Mersenne twister to eliminate implementation variation +MersenneTwister mt; + +int dummya = 1, dummyb =1; + +int dummy1 = 1; +std::atomic<int> dummy2(1); +std::atomic<int> dummy3(1); + +double time_item(int const count = (int)1E8) { + + clock_t const start = clock(); + + for(int i = 0;i < count; ++i) + mt.integer(); + + clock_t const end = clock(); + double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); + + return elapsed_seconds / count; +} +double time_nil(int const count = (int)1E08) { + + clock_t const start = clock(); + + dummy3 = count; + for(int i = 0;i < (int)1E6; ++i) { + if(dummy1) { + // Do some work while holding the lock + int workunits = dummy3;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); + for (int j = 1; j < workunits; j++) + dummy1 &= j; // Do one work unit + dummy2.fetch_add(dummy1,std::memory_order_relaxed); + } + } + + clock_t const end = clock(); + double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); + + return elapsed_seconds / count; +} + + +template <class mutex_type> +void testmutex_inner(mutex_type& m, std::atomic<int>& t,std::atomic<int>& wc,std::atomic<int>& wnc, int const num_iterations, + int const num_items_critical, int const num_items_noncritical, MersenneTwister& mtc, MersenneTwister& mtnc, bool skip) { + + for(int k = 0; k < num_iterations; ++k) { + + if(num_items_noncritical) { + // Do some work without holding the lock + int workunits = num_items_noncritical;//(int) (mtnc.poissonInterval((float)num_items_noncritical) + 0.5f); + for (int i = 1; i < workunits; i++) + mtnc.integer(); // Do one work unit + wnc.fetch_add(workunits,std::memory_order_relaxed); + } + + t.fetch_add(1,std::memory_order_relaxed); + + if(!skip) { + std::unique_lock<mutex_type> l(m); + if(num_items_critical) { + // Do some work while holding the lock + int workunits = num_items_critical;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); + for (int i = 1; i < workunits; i++) + mtc.integer(); // Do one work unit + wc.fetch_add(workunits,std::memory_order_relaxed); + } + } + } +} +template <class mutex_type> +void testmutex_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double critical_fraction, double critical_duration) { + + std::ostringstream truename; + truename << name << " (f=" << critical_fraction << ",d=" << critical_duration << ")"; + + std::vector<double>& data = results[truename.str()]; + + double const workItemTime = time_item() , + nilTime = time_nil(); + + int const num_items_critical = (critical_duration <= 0 ? 0 : (std::max)( int(critical_duration / workItemTime + 0.5), int(100 * nilTime / workItemTime + 0.5))), + num_items_noncritical = (num_items_critical <= 0 ? 0 : int( ( 1 - critical_fraction ) * num_items_critical / critical_fraction + 0.5 )); + + FOR_GAUNTLET(num_threads) { + + //Kokkos::Impl::portable_sleep(std::chrono::microseconds(2000000)); + + int const num_iterations = (num_items_critical + num_items_noncritical != 0) ? +#ifdef __SYNCHRONIC_JUST_YIELD + int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : +#else + int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : +#endif +#ifdef WIN32 + int( 1 / workItemTime / (20 * num_threads * num_threads) ); +#else + int( 1 / workItemTime / (200 * num_threads * num_threads) ); +#endif + +#ifdef VERBOSE_TEST + std::cerr << "running " << truename.str() << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\n" << std::flush; +#endif + + + std::atomic<int> t[2], wc[2], wnc[2]; + + clock_t start[2], end[2]; + for(int pass = 0; pass < 2; ++pass) { + + t[pass] = 0; + wc[pass] = 0; + wnc[pass] = 0; + + srand(num_threads); + std::vector<MersenneTwister> randomsnc(num_threads), + randomsc(num_threads); + + mutex_type m; + + start[pass] = clock(); +#ifdef USEOMP + omp_set_num_threads(num_threads); + std::atomic<int> _j(0); + #pragma omp parallel + { + int const j = _j.fetch_add(1,std::memory_order_relaxed); + testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); + num_threads = omp_get_num_threads(); + } +#else + std::vector<std::thread*> threads(num_threads); + for(unsigned j = 0; j < num_threads; ++j) + threads[j] = new std::thread([&,j](){ + testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); + } + ); + for(unsigned j = 0; j < num_threads; ++j) { + threads[j]->join(); + delete threads[j]; + } +#endif + end[pass] = clock(); + } + if(t[0] != t[1]) throw std::string("mismatched iteration counts"); + if(wnc[0] != wnc[1]) throw std::string("mismatched work item counts"); + + double elapsed_seconds_0 = (end[0] - start[0]) / double(CLOCKS_PER_SEC), + elapsed_seconds_1 = (end[1] - start[1]) / double(CLOCKS_PER_SEC); + double time = (elapsed_seconds_1 - elapsed_seconds_0 - wc[1]*workItemTime) / num_iterations; + + data.push_back(time); +#ifdef VERBOSE_TEST + std::cerr << truename.str() << " : " << num_threads << "," << elapsed_seconds_1 / num_iterations << " - " << elapsed_seconds_0 / num_iterations << " - " << wc[1]*workItemTime/num_iterations << " = " << time << " \n"; +#endif + } +} + +template <class barrier_type> +void testbarrier_inner(barrier_type& b, int const num_threads, int const j, std::atomic<int>& t,std::atomic<int>& w, + int const num_iterations_odd, int const num_iterations_even, + int const num_items_noncritical, MersenneTwister& arg_mt, bool skip) { + + for(int k = 0; k < (std::max)(num_iterations_even,num_iterations_odd); ++k) { + + if(k >= (~j & 0x1 ? num_iterations_odd : num_iterations_even )) { + if(!skip) + b.arrive_and_drop(); + break; + } + + if(num_items_noncritical) { + // Do some work without holding the lock + int workunits = (int) (arg_mt.poissonInterval((float)num_items_noncritical) + 0.5f); + for (int i = 1; i < workunits; i++) + arg_mt.integer(); // Do one work unit + w.fetch_add(workunits,std::memory_order_relaxed); + } + + t.fetch_add(1,std::memory_order_relaxed); + + if(!skip) { + int const thiscount = (std::min)(k+1,num_iterations_odd)*((num_threads>>1)+(num_threads&1)) + (std::min)(k+1,num_iterations_even)*(num_threads>>1); + if(t.load(std::memory_order_relaxed) > thiscount) { + std::cerr << "FAILURE: some threads have run ahead of the barrier (" << t.load(std::memory_order_relaxed) << ">" << thiscount << ").\n"; + EXPECT_TRUE(false); + } +#ifdef USEOMP + #pragma omp barrier +#else + b.arrive_and_wait(); +#endif + if(t.load(std::memory_order_relaxed) < thiscount) { + std::cerr << "FAILURE: some threads have fallen behind the barrier (" << t.load(std::memory_order_relaxed) << "<" << thiscount << ").\n"; + EXPECT_TRUE(false); + } + } + } +} +template <class barrier_type> +void testbarrier_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double barrier_frequency, double phase_duration, bool randomIterations = false) { + + std::vector<double>& data = results[name]; + + double const workItemTime = time_item(); + int const num_items_noncritical = int( phase_duration / workItemTime + 0.5 ); + + FOR_GAUNTLET(num_threads) { + + int const num_iterations = int( barrier_frequency ); +#ifdef VERBOSE_TEST + std::cerr << "running " << name << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\r" << std::flush; +#endif + + srand(num_threads); + + MersenneTwister local_mt; + int const num_iterations_odd = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations, + num_iterations_even = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations; + + std::atomic<int> t[2], w[2]; + std::chrono::time_point<std::chrono::high_resolution_clock> start[2], end[2]; + for(int pass = 0; pass < 2; ++pass) { + + t[pass] = 0; + w[pass] = 0; + + srand(num_threads); + std::vector<MersenneTwister> randoms(num_threads); + + barrier_type b(num_threads); + + start[pass] = std::chrono::high_resolution_clock::now(); +#ifdef USEOMP + omp_set_num_threads(num_threads); + std::atomic<int> _j(0); + #pragma omp parallel + { + int const j = _j.fetch_add(1,std::memory_order_relaxed); + testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); + num_threads = omp_get_num_threads(); + } +#else + std::vector<std::thread*> threads(num_threads); + for(unsigned j = 0; j < num_threads; ++j) + threads[j] = new std::thread([&,j](){ + testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); + }); + for(unsigned j = 0; j < num_threads; ++j) { + threads[j]->join(); + delete threads[j]; + } +#endif + end[pass] = std::chrono::high_resolution_clock::now(); + } + + if(t[0] != t[1]) throw std::string("mismatched iteration counts"); + if(w[0] != w[1]) throw std::string("mismatched work item counts"); + + int const phases = (std::max)(num_iterations_odd, num_iterations_even); + + std::chrono::duration<double> elapsed_seconds_0 = end[0]-start[0], + elapsed_seconds_1 = end[1]-start[1]; + double const time = (elapsed_seconds_1.count() - elapsed_seconds_0.count()) / phases; + + data.push_back(time); +#ifdef VERBOSE_TEST + std::cerr << name << " : " << num_threads << "," << elapsed_seconds_1.count() / phases << " - " << elapsed_seconds_0.count() / phases << " = " << time << " \n"; +#endif + } +} + +template <class... T> +struct mutex_tester; +template <class F> +struct mutex_tester<F> { + static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) { + testmutex_outer<F>(results, *name, critical_fraction, critical_duration); + } +}; +template <class F, class... T> +struct mutex_tester<F,T...> { + static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) { + mutex_tester<F>::run(results, name, critical_fraction, critical_duration); + mutex_tester<T...>::run(results, ++name, critical_fraction, critical_duration); + } +}; + +TEST( synchronic, main ) +{ + //warm up + time_item(); + + //measure up +#ifdef VERBOSE_TEST + std::cerr << "measuring work item speed...\r"; + std::cerr << "work item speed is " << time_item() << " per item, nil is " << time_nil() << "\n"; +#endif + try { + + std::pair<double,double> testpoints[] = { {1, 0}, /*{1E-1, 10E-3}, {5E-1, 2E-6}, {3E-1, 50E-9},*/ }; + for(auto x : testpoints ) { + + std::map<std::string,std::vector<double>> results; + + //testbarrier_outer<std::barrier>(results, PREFIX"bar 1khz 100us", 1E3, x.second); + + std::string const names[] = { + PREFIX"tkt", PREFIX"mcs", PREFIX"ttas", PREFIX"std" +#ifdef WIN32 + ,PREFIX"srw" +#endif + }; + + //run --> + + mutex_tester< + ticket_mutex, mcs_mutex, ttas_mutex, std::mutex +#ifdef WIN32 + ,srw_mutex +#endif + >::run(results, names, x.first, x.second); + + //<-- run + +#ifdef VERBOSE_TEST + std::cout << "threads"; + for(auto & i : results) + std::cout << ",\"" << i.first << '\"'; + std::cout << std::endl; + int j = 0; + FOR_GAUNTLET(num_threads) { + std::cout << num_threads; + for(auto & i : results) + std::cout << ',' << i.second[j]; + std::cout << std::endl; + ++j; + } +#endif + } + } + catch(std::string & e) { + std::cerr << "EXCEPTION : " << e << std::endl; + EXPECT_TRUE( false ); + } +} + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d820129e8b571fa5eac2dc7f8d5016c47cd589f4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSynchronic.hpp @@ -0,0 +1,240 @@ +/* + +Copyright (c) 2014, NVIDIA Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TEST_SYNCHRONIC_HPP +#define TEST_SYNCHRONIC_HPP + +#include <impl/Kokkos_Synchronic.hpp> +#include <mutex> + +namespace Test { + +template <bool truly> +struct dumb_mutex { + + dumb_mutex () : locked(0) { + } + + void lock() { + while(1) { + bool state = false; + if (locked.compare_exchange_weak(state,true,std::memory_order_acquire)) { + break; + } + while (locked.load(std::memory_order_relaxed)) { + if (!truly) { + Kokkos::Impl::portable_yield(); + } + } + } + } + + void unlock() { + locked.store(false,std::memory_order_release); + } + +private : + std::atomic<bool> locked; +}; + +#ifdef WIN32 +#include <winsock2.h> +#include <windows.h> +#include <synchapi.h> +struct srw_mutex { + + srw_mutex () { + InitializeSRWLock(&_lock); + } + + void lock() { + AcquireSRWLockExclusive(&_lock); + } + void unlock() { + ReleaseSRWLockExclusive(&_lock); + } + +private : + SRWLOCK _lock; +}; +#endif + +struct ttas_mutex { + + ttas_mutex() : locked(false) { + } + + ttas_mutex(const ttas_mutex&) = delete; + ttas_mutex& operator=(const ttas_mutex&) = delete; + + void lock() { + for(int i = 0;; ++i) { + bool state = false; + if(locked.compare_exchange_weak(state,true,std::memory_order_relaxed,Kokkos::Impl::notify_none)) + break; + locked.expect_update(true); + } + std::atomic_thread_fence(std::memory_order_acquire); + } + void unlock() { + locked.store(false,std::memory_order_release); + } + +private : + Kokkos::Impl::synchronic<bool> locked; +}; + +struct ticket_mutex { + + ticket_mutex() : active(0), queue(0) { + } + + ticket_mutex(const ticket_mutex&) = delete; + ticket_mutex& operator=(const ticket_mutex&) = delete; + + void lock() { + int const me = queue.fetch_add(1, std::memory_order_relaxed); + while(me != active.load_when_equal(me, std::memory_order_acquire)) + ; + } + + void unlock() { + active.fetch_add(1,std::memory_order_release); + } +private : + Kokkos::Impl::synchronic<int> active; + std::atomic<int> queue; +}; + +struct mcs_mutex { + + mcs_mutex() : head(nullptr) { + } + + mcs_mutex(const mcs_mutex&) = delete; + mcs_mutex& operator=(const mcs_mutex&) = delete; + + struct unique_lock { + + unique_lock(mcs_mutex & arg_m) : m(arg_m), next(nullptr), ready(false) { + + unique_lock * const h = m.head.exchange(this,std::memory_order_acquire); + if(__builtin_expect(h != nullptr,0)) { + h->next.store(this,std::memory_order_seq_cst,Kokkos::Impl::notify_one); + while(!ready.load_when_not_equal(false,std::memory_order_acquire)) + ; + } + } + + unique_lock(const unique_lock&) = delete; + unique_lock& operator=(const unique_lock&) = delete; + + ~unique_lock() { + unique_lock * h = this; + if(__builtin_expect(!m.head.compare_exchange_strong(h,nullptr,std::memory_order_release, std::memory_order_relaxed),0)) { + unique_lock * n = next.load(std::memory_order_relaxed); + while(!n) + n = next.load_when_not_equal(n,std::memory_order_relaxed); + n->ready.store(true,std::memory_order_release,Kokkos::Impl::notify_one); + } + } + + private: + mcs_mutex & m; + Kokkos::Impl::synchronic<unique_lock*> next; + Kokkos::Impl::synchronic<bool> ready; + }; + +private : + std::atomic<unique_lock*> head; +}; + +} + +namespace std { +template<> +struct unique_lock<Test::mcs_mutex> : Test::mcs_mutex::unique_lock { + unique_lock(Test::mcs_mutex & arg_m) : Test::mcs_mutex::unique_lock(arg_m) { + } + unique_lock(const unique_lock&) = delete; + unique_lock& operator=(const unique_lock&) = delete; +}; + +} + +/* #include <cmath> */ +#include <stdlib.h> + +namespace Test { + +//------------------------------------- +// MersenneTwister +//------------------------------------- +#define MT_IA 397 +#define MT_LEN 624 + +class MersenneTwister +{ + volatile unsigned long m_buffer[MT_LEN][64/sizeof(unsigned long)]; + volatile int m_index; + +public: + MersenneTwister() { + for (int i = 0; i < MT_LEN; i++) + m_buffer[i][0] = rand(); + m_index = 0; + for (int i = 0; i < MT_LEN * 100; i++) + integer(); + } + unsigned long integer() { + // Indices + int i = m_index; + int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around + int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around + + // Twist + unsigned long s = (m_buffer[i][0] & 0x80000000) | (m_buffer[i2][0] & 0x7fffffff); + unsigned long r = m_buffer[j][0] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF); + m_buffer[m_index][0] = r; + m_index = i2; + + // Swizzle + r ^= (r >> 11); + r ^= (r << 7) & 0x9d2c5680UL; + r ^= (r << 15) & 0xefc60000UL; + r ^= (r >> 18); + return r; + } + float poissonInterval(float ooLambda) { + return -logf(1.0f - integer() * 2.3283e-10f) * ooLambda; + } +}; + +} // namespace Test + +#endif //TEST_HPP diff --git a/lib/kokkos/core/unit_test/TestTaskPolicy.hpp b/lib/kokkos/core/unit_test/TestTaskPolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..71790f6def82d50a12d37d88e0b0e7d17f28799f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTaskPolicy.hpp @@ -0,0 +1,1145 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP +#define KOKKOS_UNITTEST_TASKPOLICY_HPP + +#include <stdio.h> +#include <iostream> +#include <cmath> +#include <Kokkos_TaskPolicy.hpp> + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace TestTaskPolicy { + +namespace { + +long eval_fib( long n ) +{ + constexpr long mask = 0x03 ; + + long fib[4] = { 0 , 1 , 1 , 2 }; + + for ( long i = 2 ; i <= n ; ++i ) { + fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; + } + + return fib[ n & mask ]; +} + +} + +template< typename Space > +struct TestFib +{ + typedef Kokkos::TaskPolicy<Space> policy_type ; + typedef Kokkos::Future<long,Space> future_type ; + typedef long value_type ; + + policy_type policy ; + future_type fib_m1 ; + future_type fib_m2 ; + const value_type n ; + + KOKKOS_INLINE_FUNCTION + TestFib( const policy_type & arg_policy , const value_type arg_n ) + : policy(arg_policy) + , fib_m1() , fib_m2() + , n( arg_n ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & , value_type & result ) + { +#if 0 + printf( "\nTestFib(%ld) %d %d\n" + , n + , int( ! fib_m1.is_null() ) + , int( ! fib_m2.is_null() ) + ); +#endif + + if ( n < 2 ) { + result = n ; + } + else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) { + result = fib_m1.get() + fib_m2.get(); + } + else { + + // Spawn new children and respawn myself to sum their results: + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + fib_m2 = policy.task_spawn( TestFib(policy,n-2) + , Kokkos::TaskSingle + , Kokkos::TaskHighPriority ); + + fib_m1 = policy.task_spawn( TestFib(policy,n-1) + , Kokkos::TaskSingle ); + + Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 }; + + Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep ); + + if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) { + // High priority to retire this branch + policy.respawn( this , Kokkos::TaskHighPriority , fib_all ); + } + else { +#if 0 + printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , n + , policy.allocation_capacity() + , policy.allocated_task_count_max() + , policy.allocated_task_count_accum() + ); +#endif + Kokkos::abort("TestFib insufficient memory"); + + } + } + } + + static void run( int i , size_t MemoryCapacity = 16000 ) + { + typedef typename policy_type::memory_space memory_space ; + + enum { Log2_SuperBlockSize = 12 }; + + policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + + future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle ); + Kokkos::wait( root_policy ); + ASSERT_EQ( eval_fib(i) , f.get() ); + +#if 0 + fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , i + , int(root_policy.template spawn_allocation_size<TestFib>()) + , int(root_policy.when_all_allocation_size(2)) + , root_policy.allocation_capacity() + , root_policy.allocated_task_count_max() + , root_policy.allocated_task_count_accum() + ); + fflush( stdout ); +#endif + } + +}; + +} // namespace TestTaskPolicy + +//---------------------------------------------------------------------------- + +namespace TestTaskPolicy { + +template< class Space > +struct TestTaskDependence { + + typedef Kokkos::TaskPolicy<Space> policy_type ; + typedef Kokkos::Future<Space> future_type ; + typedef Kokkos::View<long,Space> accum_type ; + typedef void value_type ; + + policy_type m_policy ; + accum_type m_accum ; + long m_count ; + + KOKKOS_INLINE_FUNCTION + TestTaskDependence( long n + , const policy_type & arg_policy + , const accum_type & arg_accum ) + : m_policy( arg_policy ) + , m_accum( arg_accum ) + , m_count( n ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & ) + { + enum { CHUNK = 8 }; + const int n = CHUNK < m_count ? CHUNK : m_count ; + + if ( 1 < m_count ) { + future_type f[ CHUNK ] ; + + const int inc = ( m_count + n - 1 ) / n ; + + for ( int i = 0 ; i < n ; ++i ) { + long begin = i * inc ; + long count = begin + inc < m_count ? inc : m_count - begin ; + f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle ); + } + + m_count = 0 ; + + m_policy.respawn( this , m_policy.when_all( n , f ) ); + } + else if ( 1 == m_count ) { + Kokkos::atomic_increment( & m_accum() ); + } + } + + static void run( int n ) + { + typedef typename policy_type::memory_space memory_space ; + + // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool + enum { MemoryCapacity = 16000 }; + enum { Log2_SuperBlockSize = 12 }; + policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + + accum_type accum("accum"); + + typename accum_type::HostMirror host_accum = + Kokkos::create_mirror_view( accum ); + + policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle ); + + Kokkos::wait( policy ); + + Kokkos::deep_copy( host_accum , accum ); + + ASSERT_EQ( host_accum() , n ); + } +}; + +} // namespace TestTaskPolicy + +//---------------------------------------------------------------------------- + +namespace TestTaskPolicy { + +template< class ExecSpace > +struct TestTaskTeam { + + //enum { SPAN = 8 }; + enum { SPAN = 33 }; + //enum { SPAN = 1 }; + + typedef void value_type ; + typedef Kokkos::TaskPolicy<ExecSpace> policy_type ; + typedef Kokkos::Future<ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type parfor_result ; + view_type parreduce_check ; + view_type parscan_result ; + view_type parscan_check ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TestTaskTeam( const policy_type & arg_policy + , const view_type & arg_parfor_result + , const view_type & arg_parreduce_check + , const view_type & arg_parscan_result + , const view_type & arg_parscan_check + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , parfor_result( arg_parfor_result ) + , parreduce_check( arg_parreduce_check ) + , parscan_result( arg_parscan_result ) + , parscan_check( arg_parscan_check ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & member ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + future = policy.task_spawn + ( TestTaskTeam( policy , + parfor_result , + parreduce_check, + parscan_result, + parscan_check, + begin - 1 ) + , Kokkos::TaskTeam ); + + assert( ! future.is_null() ); + + policy.respawn( this , future ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parfor_result[i] = i ; } + ); + + // test parallel_reduce without join + + long tot = 0; + long expected = (begin+end-1)*(end-begin)*0.5; + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &res) { res += parfor_result[i]; } + , tot); + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parreduce_check[i] = expected-tot ; } + ); + + // test parallel_reduce with join + + tot = 0; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &res) { res += parfor_result[i]; } + , [&]( long& val1, const long& val2) { val1 += val2; } + , tot); + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parreduce_check[i] += expected-tot ; } + ); + +#if 0 + // test parallel_scan + + // Exclusive scan + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &val , const bool final ) { + if ( final ) { parscan_result[i] = val; } + val += i; + } + ); + + if ( member.team_rank() == 0 ) { + for ( long i = begin ; i < end ; ++i ) { + parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i]; + } + } + + // Inclusive scan + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &val , const bool final ) { + val += i; + if ( final ) { parscan_result[i] = val; } + } + ); + + if ( member.team_rank() == 0 ) { + for ( long i = begin ; i < end ; ++i ) { + parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i]; + } + } +#endif + + } + + static void run( long n ) + { + // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop + // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP + const unsigned memory_capacity = 400000 ; + + policy_type root_policy( typename policy_type::memory_space() + , memory_capacity ); + + view_type root_parfor_result("parfor_result",n+1); + view_type root_parreduce_check("parreduce_check",n+1); + view_type root_parscan_result("parscan_result",n+1); + view_type root_parscan_check("parscan_check",n+1); + + typename view_type::HostMirror + host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); + typename view_type::HostMirror + host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); + typename view_type::HostMirror + host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); + typename view_type::HostMirror + host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); + + future_type f = root_policy.host_spawn( + TestTaskTeam( root_policy , + root_parfor_result , + root_parreduce_check , + root_parscan_result, + root_parscan_check, + n ) , + Kokkos::TaskTeam ); + + Kokkos::wait( root_policy ); + + Kokkos::deep_copy( host_parfor_result , root_parfor_result ); + Kokkos::deep_copy( host_parreduce_check , root_parreduce_check ); + Kokkos::deep_copy( host_parscan_result , root_parscan_result ); + Kokkos::deep_copy( host_parscan_check , root_parscan_check ); + + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i ; + if ( host_parfor_result(i) != answer ) { + std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " + << host_parfor_result(i) << " != " << answer << std::endl ; + } + if ( host_parreduce_check(i) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " + << host_parreduce_check(i) << " != 0" << std::endl ; + } //TODO + if ( host_parscan_check(i) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " + << host_parscan_check(i) << " != 0" << std::endl ; + } + } + } +}; + +template< class ExecSpace > +struct TestTaskTeamValue { + + enum { SPAN = 8 }; + + typedef long value_type ; + typedef Kokkos::TaskPolicy<ExecSpace> policy_type ; + typedef Kokkos::Future<value_type,ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type result ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TestTaskTeamValue( const policy_type & arg_policy + , const view_type & arg_result + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , result( arg_result ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type const & member + , value_type & final ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + + future = policy.task_spawn + ( TestTaskTeamValue( policy , result , begin - 1 ) + , Kokkos::TaskTeam ); + + assert( ! future.is_null() ); + + policy.respawn( this , future ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { result[i] = i + 1 ; } + ); + + if ( member.team_rank() == 0 ) { + final = result[nvalue] ; + } + + Kokkos::memory_fence(); + } + + static void run( long n ) + { + // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop + const unsigned memory_capacity = 100000 ; + + policy_type root_policy( typename policy_type::memory_space() + , memory_capacity ); + + view_type root_result("result",n+1); + + typename view_type::HostMirror + host_result = Kokkos::create_mirror_view( root_result ); + + future_type fv = root_policy.host_spawn + ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam ); + + Kokkos::wait( root_policy ); + + Kokkos::deep_copy( host_result , root_result ); + + if ( fv.get() != n + 1 ) { + std::cerr << "TestTaskTeamValue ERROR future = " + << fv.get() << " != " << n + 1 << std::endl ; + } + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i + 1 ; + if ( host_result(i) != answer ) { + std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " + << host_result(i) << " != " << answer << std::endl ; + } + } + } +}; +} // namespace TestTaskPolicy + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace TestTaskPolicy { + +template< class ExecSpace > +struct FibChild { + + typedef long value_type ; + + Kokkos::Experimental::TaskPolicy<ExecSpace> policy ; + Kokkos::Experimental::Future<long,ExecSpace> fib_1 ; + Kokkos::Experimental::Future<long,ExecSpace> fib_2 ; + const value_type n ; + int has_nested ; + + KOKKOS_INLINE_FUNCTION + FibChild( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy + , const value_type arg_n ) + : policy(arg_policy) + , fib_1() , fib_2() + , n( arg_n ), has_nested(0) {} + + KOKKOS_INLINE_FUNCTION + void apply( value_type & result ) + { + typedef Kokkos::Experimental::Future<long,ExecSpace> future_type ; + + if ( n < 2 ) { + + has_nested = -1 ; + + result = n ; + } + else { + if ( has_nested == 0 ) { + // Spawn new children and respawn myself to sum their results: + // Spawn lower value at higher priority as it has a shorter + // path to completion. + if ( fib_2.is_null() ) { + fib_2 = policy.task_create( FibChild(policy,n-2) ); + } + + if ( ! fib_2.is_null() && fib_1.is_null() ) { + fib_1 = policy.task_create( FibChild(policy,n-1) ); + } + + if ( ! fib_1.is_null() ) { + has_nested = 2 ; + + policy.spawn( fib_2 , true /* high priority */ ); + policy.spawn( fib_1 ); + policy.add_dependence( this , fib_1 ); + policy.add_dependence( this , fib_2 ); + policy.respawn( this ); + } + else { + // Release task memory before spawning the task, + // after spawning memory cannot be released. + fib_2 = future_type(); + // Respawn when more memory is available + policy.respawn_needing_memory( this ); + } + } + else if ( has_nested == 2 ) { + + has_nested = -1 ; + + result = fib_1.get() + fib_2.get(); + +if ( false ) { + printf("FibChild %ld = fib(%ld), task_count(%d)\n" + , long(n), long(result), policy.allocated_task_count()); +} + + } + else { + printf("FibChild(%ld) execution error\n",(long)n); + Kokkos::abort("FibChild execution error"); + } + } + } +}; + +template< class ExecSpace > +struct FibChild2 { + + typedef long value_type ; + + Kokkos::Experimental::TaskPolicy<ExecSpace> policy ; + Kokkos::Experimental::Future<long,ExecSpace> fib_a ; + Kokkos::Experimental::Future<long,ExecSpace> fib_b ; + const value_type n ; + int has_nested ; + + KOKKOS_INLINE_FUNCTION + FibChild2( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy + , const value_type arg_n ) + : policy(arg_policy) + , n( arg_n ), has_nested(0) {} + + KOKKOS_INLINE_FUNCTION + void apply( value_type & result ) + { + if ( 0 == has_nested ) { + if ( n < 2 ) { + + has_nested = -1 ; + + result = n ; + } + else if ( n < 4 ) { + // Spawn new children and respawn myself to sum their results: + // result = Fib(n-1) + Fib(n-2) + has_nested = 2 ; + + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + policy.clear_dependence( this ); + fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-1) ) ); + fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-2) ) , true ); + policy.add_dependence( this , fib_a ); + policy.add_dependence( this , fib_b ); + policy.respawn( this ); + } + else { + // Spawn new children and respawn myself to sum their results: + // result = Fib(n-1) + Fib(n-2) + // result = ( Fib(n-2) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) ) + // result = ( ( Fib(n-3) + Fib(n-4) ) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) ) + // result = 3 * Fib(n-3) + 2 * Fib(n-4) + has_nested = 4 ; + + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + policy.clear_dependence( this ); + fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-3) ) ); + fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-4) ) , true ); + policy.add_dependence( this , fib_a ); + policy.add_dependence( this , fib_b ); + policy.respawn( this ); + } + } + else if ( 2 == has_nested || 4 == has_nested ) { + result = ( has_nested == 2 ) ? fib_a.get() + fib_b.get() + : 3 * fib_a.get() + 2 * fib_b.get() ; + + has_nested = -1 ; + } + else { + printf("FibChild2(%ld) execution error\n",(long)n); + Kokkos::abort("FibChild2 execution error"); + } + } +}; + +template< class ExecSpace > +void test_fib( long n , const unsigned task_max_count = 4096 ) +{ + const unsigned task_max_size = 256 ; + const unsigned task_dependence = 4 ; + + Kokkos::Experimental::TaskPolicy<ExecSpace> + policy( task_max_count + , task_max_size + , task_dependence ); + + Kokkos::Experimental::Future<long,ExecSpace> f = + policy.spawn( policy.proc_create( FibChild<ExecSpace>(policy,n) ) ); + + Kokkos::Experimental::wait( policy ); + + if ( f.get() != eval_fib(n) ) { + std::cout << "Fib(" << n << ") = " << f.get(); + std::cout << " != " << eval_fib(n); + std::cout << std::endl ; + } +} + +template< class ExecSpace > +void test_fib2( long n , const unsigned task_max_count = 1024 ) +{ + const unsigned task_max_size = 256 ; + const unsigned task_dependence = 4 ; + + Kokkos::Experimental::TaskPolicy<ExecSpace> + policy( task_max_count + , task_max_size + , task_dependence ); + + Kokkos::Experimental::Future<long,ExecSpace> f = + policy.spawn( policy.proc_create( FibChild2<ExecSpace>(policy,n) ) ); + + Kokkos::Experimental::wait( policy ); + + if ( f.get() != eval_fib(n) ) { + std::cout << "Fib2(" << n << ") = " << f.get(); + std::cout << " != " << eval_fib(n); + std::cout << std::endl ; + } +} + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +struct Norm2 { + + typedef double value_type ; + + const double * const m_x ; + + Norm2( const double * x ) : m_x(x) {} + + inline + void init( double & val ) const { val = 0 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( int i , double & val ) const { val += m_x[i] * m_x[i] ; } + + void apply( double & dst ) const { dst = std::sqrt( dst ); } +}; + +template< class ExecSpace > +void test_norm2( const int n ) +{ + const unsigned task_max_count = 1024 ; + const unsigned task_max_size = 256 ; + const unsigned task_dependence = 4 ; + + Kokkos::Experimental::TaskPolicy<ExecSpace> + policy( task_max_count + , task_max_size + , task_dependence ); + + double * const x = new double[n]; + + for ( int i = 0 ; i < n ; ++i ) x[i] = 1 ; + + Kokkos::RangePolicy<ExecSpace> r(0,n); + + Kokkos::Experimental::Future<double,ExecSpace> f = + Kokkos::Experimental::spawn_reduce( policy , r , Norm2<ExecSpace>(x) ); + + Kokkos::Experimental::wait( policy ); + +#if defined(PRINT) + std::cout << "Norm2: " << f.get() << std::endl ; +#endif + + delete[] x ; +} + +//---------------------------------------------------------------------------- + +template< class Space > +struct TaskDep { + + typedef int value_type ; + typedef Kokkos::Experimental::TaskPolicy< Space > policy_type ; + + const policy_type policy ; + const int input ; + + TaskDep( const policy_type & arg_p , const int arg_i ) + : policy( arg_p ), input( arg_i ) {} + + KOKKOS_INLINE_FUNCTION + void apply( int & val ) + { + val = input ; + const int num = policy.get_dependence( this ); + + for ( int i = 0 ; i < num ; ++i ) { + Kokkos::Experimental::Future<int,Space> f = policy.get_dependence( this , i ); + val += f.get(); + } + } +}; + + +template< class Space > +void test_task_dep( const int n ) +{ + enum { NTEST = 64 }; + + const unsigned task_max_count = 1024 ; + const unsigned task_max_size = 64 ; + const unsigned task_dependence = 4 ; + + Kokkos::Experimental::TaskPolicy<Space> + policy( task_max_count + , task_max_size + , task_dependence ); + + Kokkos::Experimental::Future<int,Space> f[ NTEST ]; + + for ( int i = 0 ; i < NTEST ; ++i ) { + // Create task in the "constructing" state with capacity for 'n+1' dependences + f[i] = policy.proc_create( TaskDep<Space>(policy,0) , n + 1 ); + + if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) { + Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING"); + } + + // Only use 'n' dependences + + for ( int j = 0 ; j < n ; ++j ) { + + Kokkos::Experimental::Future<int,Space> nested = + policy.proc_create( TaskDep<Space>(policy,j+1) ); + + policy.spawn( nested ); + + // Add dependence to a "constructing" task + policy.add_dependence( f[i] , nested ); + } + + // Spawn task from the "constructing" to the "waiting" state + policy.spawn( f[i] ); + } + + const int answer = n % 2 ? n * ( ( n + 1 ) / 2 ) : ( n / 2 ) * ( n + 1 ); + + Kokkos::Experimental::wait( policy ); + + int error = 0 ; + for ( int i = 0 ; i < NTEST ; ++i ) { + if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE ) { + Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE"); + } + if ( answer != f[i].get() && 0 == error ) { + std::cout << "test_task_dep(" << n << ") ERROR at[" << i << "]" + << " answer(" << answer << ") != result(" << f[i].get() << ")" << std::endl ; + } + } +} + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +struct TaskTeam { + + enum { SPAN = 8 }; + + typedef void value_type ; + typedef Kokkos::Experimental::TaskPolicy<ExecSpace> policy_type ; + typedef Kokkos::Experimental::Future<void,ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type result ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TaskTeam( const policy_type & arg_policy + , const view_type & arg_result + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , result( arg_result ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void apply( const typename policy_type::member_type & member ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.get_task_state() == Kokkos::Experimental::TASK_STATE_NULL ) { + if ( member.team_rank() == 0 ) { + future = policy.spawn( policy.task_create_team( TaskTeam( policy , result , begin - 1 ) ) ); + policy.clear_dependence( this ); + policy.add_dependence( this , future ); + policy.respawn( this ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { result[i] = i + 1 ; } + ); + } +}; + +template< class ExecSpace > +struct TaskTeamValue { + + enum { SPAN = 8 }; + + typedef long value_type ; + typedef Kokkos::Experimental::TaskPolicy<ExecSpace> policy_type ; + typedef Kokkos::Experimental::Future<value_type,ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type result ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TaskTeamValue( const policy_type & arg_policy + , const view_type & arg_result + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , result( arg_result ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void apply( const typename policy_type::member_type & member , value_type & final ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + + future = policy.task_create_team( TaskTeamValue( policy , result , begin - 1 ) ); + + policy.spawn( future ); + policy.add_dependence( this , future ); + policy.respawn( this ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { result[i] = i + 1 ; } + ); + + if ( member.team_rank() == 0 ) { + final = result[nvalue] ; + } + + Kokkos::memory_fence(); + } +}; + +template< class ExecSpace > +void test_task_team( long n ) +{ + typedef TaskTeam< ExecSpace > task_type ; + typedef TaskTeamValue< ExecSpace > task_value_type ; + typedef typename task_type::view_type view_type ; + typedef typename task_type::policy_type policy_type ; + + typedef typename task_type::future_type future_type ; + typedef typename task_value_type::future_type future_value_type ; + + const unsigned task_max_count = 1024 ; + const unsigned task_max_size = 256 ; + const unsigned task_dependence = 4 ; + + policy_type + policy( task_max_count + , task_max_size + , task_dependence ); + + view_type result("result",n+1); + + typename view_type::HostMirror + host_result = Kokkos::create_mirror_view( result ); + + future_type f = policy.proc_create_team( task_type( policy , result , n ) ); + + ASSERT_FALSE( f.is_null() ); + + policy.spawn( f ); + + Kokkos::Experimental::wait( policy ); + + Kokkos::deep_copy( host_result , result ); + + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i + 1 ; + if ( host_result(i) != answer ) { + std::cerr << "test_task_team void ERROR result(" << i << ") = " + << host_result(i) << " != " << answer << std::endl ; + } + } + + future_value_type fv = policy.proc_create_team( task_value_type( policy , result , n ) ); + + ASSERT_FALSE( fv.is_null() ); + + policy.spawn( fv ); + + Kokkos::Experimental::wait( policy ); + + Kokkos::deep_copy( host_result , result ); + + if ( fv.get() != n + 1 ) { + std::cerr << "test_task_team value ERROR future = " + << fv.get() << " != " << n + 1 << std::endl ; + } + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i + 1 ; + if ( host_result(i) != answer ) { + std::cerr << "test_task_team value ERROR result(" << i << ") = " + << host_result(i) << " != " << answer << std::endl ; + } + } +} + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +struct TaskLatchAdd { + + typedef void value_type ; + typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace > future_type ; + + future_type latch ; + volatile int * count ; + + KOKKOS_INLINE_FUNCTION + TaskLatchAdd( const future_type & arg_latch + , volatile int * const arg_count ) + : latch( arg_latch ) + , count( arg_count ) + {} + + KOKKOS_INLINE_FUNCTION + void apply() + { + Kokkos::atomic_fetch_add( count , 1 ); + latch.add(1); + } +}; + +template< class ExecSpace > +struct TaskLatchRun { + + typedef void value_type ; + typedef Kokkos::Experimental::TaskPolicy< ExecSpace > policy_type ; + typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace > future_type ; + + policy_type policy ; + int total ; + volatile int count ; + + KOKKOS_INLINE_FUNCTION + TaskLatchRun( const policy_type & arg_policy , const int arg_total ) + : policy(arg_policy), total(arg_total), count(0) {} + + KOKKOS_INLINE_FUNCTION + void apply() + { + if ( 0 == count && 0 < total ) { + future_type latch = policy.create_latch( total ); + + for ( int i = 0 ; i < total ; ++i ) { + auto f = policy.task_create( TaskLatchAdd<ExecSpace>(latch,&count) , 0 ); + if ( f.is_null() ) { + Kokkos::abort("TaskLatchAdd allocation FAILED" ); + } + + if ( policy.spawn( f ).is_null() ) { + Kokkos::abort("TaskLatcAdd spawning FAILED" ); + } + } + + policy.add_dependence( this , latch ); + policy.respawn( this ); + } + else if ( count != total ) { + printf("TaskLatchRun FAILED %d != %d\n",count,total); + } + } +}; + + +template< class ExecSpace > +void test_latch( int n ) +{ + typedef TaskLatchRun< ExecSpace > task_type ; + typedef typename task_type::policy_type policy_type ; + + // Primary + latch + n * LatchAdd + // + // This test uses several two different block sizes for allocation from the + // memory pool, so the memory size requested must be big enough to cause two + // or more superblocks to be used. Currently, the superblock size in the + // task policy is 2^16, so make the minimum requested memory size greater + // than this. + const unsigned task_max_count = n + 2 < 256 ? 256 : n + 2; + const unsigned task_max_size = 256; + const unsigned task_dependence = 4 ; + + policy_type + policy( task_max_count + , task_max_size + , task_dependence ); + + policy.spawn( policy.proc_create( TaskLatchRun<ExecSpace>(policy,n) ) ); + + wait( policy ); +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +} // namespace TestTaskPolicy + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP */ + + diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp new file mode 100644 index 0000000000000000000000000000000000000000..db6b0cff7e21654f7ba17b531e63fbc63deb2b06 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -0,0 +1,910 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { +namespace { + +template< class ExecSpace, class ScheduleType > +struct TestTeamPolicy { + + typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ; + typedef Kokkos::View<int**,ExecSpace> view_type ; + + view_type m_flags ; + + TestTeamPolicy( const size_t league_size ) + : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags") + , Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ) + , league_size ) + {} + + struct VerifyInitTag {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const team_member & member ) const + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); + + m_flags( member.team_rank() , member.league_rank() ) = tid ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const VerifyInitTag & , const team_member & member ) const + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); + + if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) { + printf("TestTeamPolicy member(%d,%d) error %d != %d\n" + , member.league_rank() , member.team_rank() + , tid , m_flags( member.team_rank() , member.league_rank() ) ); + } + } + + // included for test_small_league_size + TestTeamPolicy() + : m_flags() + {} + + // included for test_small_league_size + struct NoOpTag {} ; + KOKKOS_INLINE_FUNCTION + void operator()( const NoOpTag & , const team_member & member ) const + {} + + + static void test_small_league_size() { + + int bs = 8; // batch size (number of elements per batch) + int ns = 16; // total number of "problems" to process + + // calculate total scratch memory space size + const int level = 0; + int mem_size = 960; + const int num_teams = ns/bs; + const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO()); + + Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0)) + , TestTeamPolicy() + ); + } + + static void test_for( const size_t league_size ) + { + TestTeamPolicy functor( league_size ); + + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor ); + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor ); + + test_small_league_size(); + } + + struct ReduceTag {}; + + typedef long value_type ; + + KOKKOS_INLINE_FUNCTION + void operator()( const team_member & member , value_type & update ) const + { + update += member.team_rank() + member.team_size() * member.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const ReduceTag & , const team_member & member , value_type & update ) const + { + update += 1 + member.team_rank() + member.team_size() * member.league_rank(); + } + + static void test_reduce( const size_t league_size ) + { + TestTeamPolicy functor( league_size ); + + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const long N = team_size * league_size ; + + long total = 0 ; + + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total ); + ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) ); + + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total ); + ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) ); + } +}; + +} +} + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< typename ScalarType , class DeviceType, class ScheduleType > +class ReduceTeamFunctor +{ +public: + typedef DeviceType execution_space ; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef typename execution_space::size_type size_type ; + + struct value_type { + ScalarType value[3] ; + }; + + const size_type nwork ; + + ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + + ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) + : nwork( rhs.nwork ) {} + + KOKKOS_INLINE_FUNCTION + void init( value_type & dst ) const + { + dst.value[0] = 0 ; + dst.value[1] = 0 ; + dst.value[2] = 0 ; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst , + const volatile value_type & src ) const + { + dst.value[0] += src.value[0] ; + dst.value[1] += src.value[1] ; + dst.value[2] += src.value[2] ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const typename policy_type::member_type ind , value_type & dst ) const + { + const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); + const int thread_size = ind.team_size() * ind.league_size(); + const int chunk = ( nwork + thread_size - 1 ) / thread_size ; + + size_type iwork = chunk * thread_rank ; + const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ; + + for ( ; iwork < iwork_end ; ++iwork ) { + dst.value[0] += 1 ; + dst.value[1] += iwork + 1 ; + dst.value[2] += nwork - iwork ; + } + } +}; + +} // namespace Test + +namespace { + +template< typename ScalarType , class DeviceType, class ScheduleType > +class TestReduceTeam +{ +public: + typedef DeviceType execution_space ; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef typename execution_space::size_type size_type ; + + //------------------------------------ + + TestReduceTeam( const size_type & nwork ) + { + run_test(nwork); + } + + void run_test( const size_type & nwork ) + { + typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ; + typedef typename functor_type::value_type value_type ; + typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + value_type result[ Repeat ]; + + const unsigned long nw = nwork ; + const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) + : (nw/2) * ( nw + 1 ); + + const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) ); + const unsigned league_size = ( nwork + team_size - 1 ) / team_size ; + + policy_type team_exec( league_size , team_size ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + result_type tmp( & result[i] ); + Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp ); + } + + execution_space::fence(); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned j = 0 ; j < Count ; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum ; + ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + } + } + } +}; + +} + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< class DeviceType, class ScheduleType > +class ScanTeamFunctor +{ +public: + typedef DeviceType execution_space ; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + + typedef long int value_type ; + Kokkos::View< value_type , execution_space > accum ; + Kokkos::View< value_type , execution_space > total ; + + ScanTeamFunctor() : accum("accum"), total("total") {} + + KOKKOS_INLINE_FUNCTION + void init( value_type & error ) const { error = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( value_type volatile & error , + value_type volatile const & input ) const + { if ( input ) error = 1 ; } + + struct JoinMax { + typedef long int value_type ; + KOKKOS_INLINE_FUNCTION + void join( value_type volatile & dst + , value_type volatile const & input ) const + { if ( dst < input ) dst = input ; } + }; + + KOKKOS_INLINE_FUNCTION + void operator()( const typename policy_type::member_type ind , value_type & error ) const + { + if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) { + const long int thread_count = ind.league_size() * ind.team_size(); + total() = ( thread_count * ( thread_count + 1 ) ) / 2 ; + } + + // Team max: + const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() ); + + if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) { + printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n" + , ind.league_rank(), ind.team_rank() + , ind.league_size(), ind.team_size() + , (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m ); + } + + // Scan: + const long int answer = + ( ind.league_rank() + 1 ) * ind.team_rank() + + ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ; + + const long int result = + ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); + + const long int result2 = + ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); + + if ( answer != result || answer != result2 ) { + printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", + ind.league_rank(), ind.team_rank(), + ind.league_size(), ind.team_size(), + answer,result,result2); + error = 1 ; + } + + const long int thread_rank = ind.team_rank() + + ind.team_size() * ind.league_rank(); + ind.team_scan( 1 + thread_rank , accum.ptr_on_device() ); + } +}; + +template< class DeviceType, class ScheduleType > +class TestScanTeam +{ +public: + typedef DeviceType execution_space ; + typedef long int value_type ; + + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef Test::ScanTeamFunctor<DeviceType, ScheduleType> functor_type ; + + //------------------------------------ + + TestScanTeam( const size_t nteam ) + { + run_test(nteam); + } + + void run_test( const size_t nteam ) + { + typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + + const unsigned REPEAT = 100000 ; + const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ; + + functor_type functor ; + + policy_type team_exec( nteam , policy_type::team_size_max( functor ) ); + + for ( unsigned i = 0 ; i < Repeat ; ++i ) { + long int accum = 0 ; + long int total = 0 ; + long int error = 0 ; + Kokkos::deep_copy( functor.accum , total ); + Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) ); + DeviceType::fence(); + Kokkos::deep_copy( accum , functor.accum ); + Kokkos::deep_copy( total , functor.total ); + + ASSERT_EQ( error , 0 ); + ASSERT_EQ( total , accum ); + } + + execution_space::fence(); + } +}; + +} // namespace Test + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< class ExecSpace, class ScheduleType > +struct SharedTeamFunctor { + + typedef ExecSpace execution_space ; + typedef int value_type ; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + + enum { SHARED_COUNT = 1000 }; + + typedef typename ExecSpace::scratch_memory_space shmem_space ; + + // tbd: MemoryUnmanaged should be the default for shared memory space + typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + + // Tell how much shared memory will be required by this functor: + inline + unsigned team_shmem_size( int team_size ) const + { + return shared_int_array_type::shmem_size( SHARED_COUNT ) + + shared_int_array_type::shmem_size( SHARED_COUNT ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const typename policy_type::member_type & ind , value_type & update ) const + { + const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); + + if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || + (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { + printf ("Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long> (SHARED_COUNT)); + ++update; // failure to allocate is an error + } + else { + for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + shared_A[i] = i + ind.league_rank(); + shared_B[i] = 2 * i + ind.league_rank(); + } + + ind.team_barrier(); + + if ( ind.team_rank() + 1 == ind.team_size() ) { + for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + if ( shared_A[i] != i + ind.league_rank() ) { + ++update ; + } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { + ++update ; + } + } + } + } + } +}; + +} + +namespace { + +template< class ExecSpace, class ScheduleType > +struct TestSharedTeam { + + TestSharedTeam() + { run(); } + + void run() + { + typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ; + typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + + const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); + + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + + typename Functor::value_type error_count = 0 ; + + Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) ); + + ASSERT_EQ( error_count , 0 ); + } +}; +} + +namespace Test { + +#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +template< class MemorySpace, class ExecSpace, class ScheduleType > +struct TestLambdaSharedTeam { + + TestLambdaSharedTeam() + { run(); } + + void run() + { + typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ; + //typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ; + + typedef typename ExecSpace::scratch_memory_space shmem_space ; + + // tbd: MemoryUnmanaged should be the default for shared memory space + typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + + const int SHARED_COUNT = 1000; + int team_size = 1; +#ifdef KOKKOS_HAVE_CUDA + if(std::is_same<ExecSpace,Kokkos::Cuda>::value) + team_size = 128; +#endif + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size); + team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int))); + + typename Functor::value_type error_count = 0 ; + + Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA + ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) { + + const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); + + if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || + (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { + printf ("Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long> (SHARED_COUNT)); + ++update; // failure to allocate is an error + } else { + for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + shared_A[i] = i + ind.league_rank(); + shared_B[i] = 2 * i + ind.league_rank(); + } + + ind.team_barrier(); + + if ( ind.team_rank() + 1 == ind.team_size() ) { + for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + if ( shared_A[i] != i + ind.league_rank() ) { + ++update ; + } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { + ++update ; + } + } + } + } + }, result_type( & error_count ) ); + + ASSERT_EQ( error_count , 0 ); + } +}; +#endif +} + +namespace Test { + +template< class ExecSpace, class ScheduleType > +struct ScratchTeamFunctor { + + typedef ExecSpace execution_space ; + typedef int value_type ; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + + enum { SHARED_TEAM_COUNT = 100 }; + enum { SHARED_THREAD_COUNT = 10 }; + + typedef typename ExecSpace::scratch_memory_space shmem_space ; + + // tbd: MemoryUnmanaged should be the default for shared memory space + typedef Kokkos::View<size_t*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + + KOKKOS_INLINE_FUNCTION + void operator()( const typename policy_type::member_type & ind , value_type & update ) const + { + const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 2*ind.team_size() ); + const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT ); + const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT ); + + if ((scratch_ptr.ptr_on_device () == NULL ) || + (scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) || + (scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) { + printf ("Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long> (SHARED_TEAM_COUNT)); + ++update; // failure to allocate is an error + } + else { + Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) { + scratch_A[i] = i + ind.league_rank(); + }); + for(int i=0; i<SHARED_THREAD_COUNT; i++) + scratch_B[i] = 10000*ind.league_rank() + 100*ind.team_rank() + i; + + scratch_ptr[ind.team_rank()] = (size_t) scratch_A.ptr_on_device(); + scratch_ptr[ind.team_rank() + ind.team_size()] = (size_t) scratch_B.ptr_on_device(); + + ind.team_barrier(); + + for( int i = 0; i<SHARED_TEAM_COUNT; i++) { + if(scratch_A[i] != size_t(i + ind.league_rank())) + ++update; + } + for( int i = 0; i < ind.team_size(); i++) { + if(scratch_ptr[0]!=scratch_ptr[i]) ++update; + } + if(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()] < + SHARED_THREAD_COUNT*sizeof(size_t)) + ++update; + for( int i = 1; i < ind.team_size(); i++) { + if((scratch_ptr[i+ind.team_size()] - scratch_ptr[i-1+ind.team_size()]) != + (scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()])) ++update; + + } + } + } +}; + +} + +namespace { + +template< class ExecSpace, class ScheduleType > +struct TestScratchTeam { + + TestScratchTeam() + { run(); } + + void run() + { + typedef Test::ScratchTeamFunctor<ExecSpace, ScheduleType> Functor ; + typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + + const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); + + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + + typename Functor::value_type error_count = 0 ; + + int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + + Functor::shared_int_array_type::shmem_size(2*team_size); + int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT); + Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size), + Kokkos::PerThread(thread_scratch_size)) , + Functor() , result_type( & error_count ) ); + + ASSERT_EQ( error_count , 0 ); + } +}; +} + +namespace Test { +template< class ExecSpace> +KOKKOS_INLINE_FUNCTION +int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16); + + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000); + + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000); + + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) { + a_team1(i) = 1000000 + i; + a_team2(i) = 2000000 + i; + a_team3(i) = 3000000 + i; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){ + a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; + a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; + a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { + b_team1(i) = 1000000 + i; + b_team2(i) = 2000000 + i; + b_team3(i) = 3000000 + i; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){ + b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; + b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; + b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + }); + + team.team_barrier(); + int error = 0; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) { + if(a_team1(i) != 1000000 + i) error++; + if(a_team2(i) != 2000000 + i) error++; + if(a_team3(i) != 3000000 + i) error++; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){ + if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; + if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; + if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { + if(b_team1(i) != 1000000 + i) error++; + if(b_team2(i) != 2000000 + i) error++; + if(b_team3(i) != 3000000 + i) error++; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){ + if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; + if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; + if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + }); + + return error; +} + + +struct TagReduce {}; +struct TagFor {}; + +template< class ExecSpace, class ScheduleType > +struct ClassNoShmemSizeFunction { + Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + + KOKKOS_INLINE_FUNCTION + void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const { + int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + errors() += error; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const { + error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + } + + void run() { + Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors"); + errors = d_errors; + + const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); + const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + + const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); + const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); + { + Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16); + Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + *this); + Kokkos::fence(); + typename Kokkos::View<int,ExecSpace>::HostMirror h_errors = Kokkos::create_mirror_view(d_errors); + Kokkos::deep_copy(h_errors,d_errors); + ASSERT_EQ(h_errors(),0); + } + + { + int error = 0; + Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16); + Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + *this,error); + Kokkos::fence(); + ASSERT_EQ(error,0); + } + }; +}; + +template< class ExecSpace, class ScheduleType > +struct ClassWithShmemSizeFunction { + Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + + KOKKOS_INLINE_FUNCTION + void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const { + int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + errors() += error; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const { + error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + } + + void run() { + Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors"); + errors = d_errors; + + const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); + const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); + { + Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16); + Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + *this); + Kokkos::fence(); + typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(d_errors); + Kokkos::deep_copy(h_errors,d_errors); + ASSERT_EQ(h_errors(),0); + } + + { + int error = 0; + Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16); + Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + *this,error); + Kokkos::fence(); + ASSERT_EQ(error,0); + } + }; + + unsigned team_shmem_size(int team_size) const { + const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); + const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + return per_team0 + team_size * per_thread0; + } +}; + +template< class ExecSpace, class ScheduleType > +void test_team_mulit_level_scratch_test_lambda() { +#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View<int,ExecSpace> d_errors("Errors"); + errors = d_errors; + + const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); + const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + + const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); + const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); + + Kokkos::TeamPolicy<ExecSpace,ScheduleType> policy(10,8,16); + Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { + int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + errors() += error; + }); + Kokkos::fence(); + typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(errors); + Kokkos::deep_copy(h_errors,d_errors); + ASSERT_EQ(h_errors(),0); + + int error = 0; + Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), + KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team, int& count) { + count += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + },error); + ASSERT_EQ(error,0); + Kokkos::fence(); +#endif +} + + +} + +namespace { +template< class ExecSpace, class ScheduleType > +struct TestMultiLevelScratchTeam { + + TestMultiLevelScratchTeam() + { run(); } + + void run() + { +#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA + Test::test_team_mulit_level_scratch_test_lambda<ExecSpace, ScheduleType>(); +#endif + Test::ClassNoShmemSizeFunction<ExecSpace, ScheduleType> c1; + c1.run(); + + Test::ClassWithShmemSizeFunction<ExecSpace, ScheduleType> c2; + c2.run(); + + } +}; +} + +namespace Test { + +template< class ExecSpace > +struct TestShmemSize { + + TestShmemSize() { run(); } + + void run() + { + typedef Kokkos::View< long***, ExecSpace > view_type; + + size_t d1 = 5; + size_t d2 = 6; + size_t d3 = 7; + + size_t size = view_type::shmem_size( d1, d2, d3 ); + + ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) ); + } +}; +} + +/*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..48187f036844ccfda2d186f245b1673c7ffe5fd4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -0,0 +1,646 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#include <impl/Kokkos_Timer.hpp> +#include <iostream> +#include <cstdlib> + +namespace TestTeamVector { + +struct my_complex { + double re,im; + int dummy; + KOKKOS_INLINE_FUNCTION + my_complex() { + re = 0.0; + im = 0.0; + dummy = 0; + } + KOKKOS_INLINE_FUNCTION + my_complex(const my_complex& src) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex(const volatile my_complex& src) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex(const double& val) { + re = val; + im = 0.0; + dummy = 0; + } + KOKKOS_INLINE_FUNCTION + my_complex& operator += (const my_complex& src) { + re += src.re; + im += src.im; + dummy += src.dummy; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator += (const volatile my_complex& src) volatile { + re += src.re; + im += src.im; + dummy += src.dummy; + } + KOKKOS_INLINE_FUNCTION + my_complex& operator *= (const my_complex& src) { + double re_tmp = re*src.re - im*src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + return *this; + } + KOKKOS_INLINE_FUNCTION + void operator *= (const volatile my_complex& src) volatile { + double re_tmp = re*src.re - im*src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + } + KOKKOS_INLINE_FUNCTION + bool operator == (const my_complex& src) { + return (re == src.re) && (im == src.im) && ( dummy == src.dummy ); + } + KOKKOS_INLINE_FUNCTION + bool operator != (const my_complex& src) { + return (re != src.re) || (im != src.im) || ( dummy != src.dummy ); + } + KOKKOS_INLINE_FUNCTION + bool operator != (const double& val) { + return (re != val) || + (im != 0) || (dummy != 0); + } + KOKKOS_INLINE_FUNCTION + my_complex& operator= (const int& val) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + KOKKOS_INLINE_FUNCTION + my_complex& operator= (const double& val) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + KOKKOS_INLINE_FUNCTION + operator double() { + return re; + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_for { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + typedef typename ExecutionSpace::scratch_memory_space shmem_space ; + typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; + typedef typename shared_int::size_type size_type; + + const size_type shmemSize = team.team_size () * 13; + shared_int values = shared_int (team.team_shmem (), shmemSize); + + if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { + printf ("FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int> (shmemSize)); + } + else { + + // Initialize shared memory + values(team.team_rank ()) = 0; + + // Accumulate value into per thread shared memory + // This is non blocking + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) { + values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); + }); + // Wait for all memory to be written + team.team_barrier (); + // One thread per team executes the comparison + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + Scalar value = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + for (int i = 0; i < team.team_size (); ++i) { + value += values(i); + } + if (test != value) { + printf ("FAILED team_parallel_for %i %i %f %f\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value)); + flag() = 1; + } + }); + } + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_reduce { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + Scalar value = Scalar(); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) { + val += i - team.league_rank () + team.league_size () + team.team_size (); + },value); + + team.team_barrier (); + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + if (test != value) { + if(team.league_rank() == 0) + printf ("FAILED team_parallel_reduce %i %i %f %f %lu\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value),sizeof(Scalar)); + flag() = 1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_reduce_join { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_reduce_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + Scalar value = 0; + + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) + , [&] (int i, Scalar& val) { + val += i - team.league_rank () + team.league_size () + team.team_size (); + } + , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;} + , value + ); + + team.team_barrier (); + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + if (test != value) { + printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value)); + flag() = 1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_vector_for { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_vector_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + typedef typename ExecutionSpace::scratch_memory_space shmem_space ; + typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; + typedef typename shared_int::size_type size_type; + + const size_type shmemSize = team.team_size () * 13; + shared_int values = shared_int (team.team_shmem (), shmemSize); + + if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { + printf ("FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int> (shmemSize)); + } + else { + Kokkos::single(Kokkos::PerThread(team),[&] () { + values(team.team_rank ()) = 0; + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) { + Kokkos::single(Kokkos::PerThread(team),[&] () { + values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); + }); + }); + + team.team_barrier (); + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + Scalar value = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + for (int i = 0; i < team.team_size (); ++i) { + value += values(i); + } + if (test != value) { + printf ("FAILED team_vector_parallel_for %i %i %f %f\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value)); + flag() = 1; + } + }); + } + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_vector_reduce { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_vector_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + Scalar value = Scalar(); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) { + val += i - team.league_rank () + team.league_size () + team.team_size (); + },value); + + team.team_barrier (); + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + if (test != value) { + if(team.league_rank() == 0) + printf ("FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value),sizeof(Scalar)); + flag() = 1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_team_vector_reduce_join { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_team_vector_reduce_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + Scalar value = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) + , [&] (int i, Scalar& val) { + val += i - team.league_rank () + team.league_size () + team.team_size (); + } + , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;} + , value + ); + + team.team_barrier (); + Kokkos::single(Kokkos::PerTeam(team),[&]() { + Scalar test = 0; + for (int i = 0; i < 131; ++i) { + test += i - team.league_rank () + team.league_size () + team.team_size (); + } + if (test != value) { + printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value)); + flag() = 1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_vec_single { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_vec_single(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + // Warning: this test case intentionally violates permissable semantics + // It is not valid to get references to members of the enclosing region + // inside a parallel_for and write to it. + Scalar value = 0; + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) { + value = i; // This write is violating Kokkos semantics for nested parallelism + }); + + Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) { + val = 1; + },value); + + Scalar value2 = 0; + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) { + val += value; + },value2); + + if(value2!=(value*13)) { + printf("FAILED vector_single broadcast %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) value2,(double) value); + flag()=1; + } + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_vec_for { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_vec_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + + typedef typename ExecutionSpace::scratch_memory_space shmem_space ; + typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; + shared_int values = shared_int(team.team_shmem(),team.team_size()*13); + + if (values.ptr_on_device () == NULL || + values.dimension_0() < (unsigned) team.team_size() * 13) { + printf ("FAILED to allocate memory of size %i\n", + static_cast<int> (team.team_size () * 13)); + flag() = 1; + } + else { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) { + values(13*team.team_rank() + i) = i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + }); + + Kokkos::single(Kokkos::PerThread(team),[&] () { + Scalar test = 0; + Scalar value = 0; + for (int i = 0; i < 13; ++i) { + test += i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + value += values(13*team.team_rank() + i); + } + if (test != value) { + printf ("FAILED vector_par_for %i %i %f %f\n", + team.league_rank (), team.team_rank (), + static_cast<double> (test), static_cast<double> (value)); + flag() = 1; + } + }); + } + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_vec_red { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_vec_red(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + Scalar value = 0; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) { + val += i; + }, value); + + Kokkos::single(Kokkos::PerThread(team),[&] () { + Scalar test = 0; + for(int i = 0; i < 13; i++) { + test+=i; + } + if(test!=value) { + printf("FAILED vector_par_reduce %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); + flag()=1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_vec_red_join { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_vec_red_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + Scalar value = 1; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13) + , [&] (int i, Scalar& val) { val *= i; } + , [&] (Scalar& val, const Scalar& src) {val*=src;} + , value + ); + + Kokkos::single(Kokkos::PerThread(team),[&] () { + Scalar test = 1; + for(int i = 0; i < 13; i++) { + test*=i; + } + if(test!=value) { + printf("FAILED vector_par_reduce_join %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); + flag()=1; + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_vec_scan { + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_vec_scan(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team) const { + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) { + val += i; + if(final) { + Scalar test = 0; + for(int k = 0; k <= i; k++) { + test+=k; + } + if(test!=val) { + printf("FAILED vector_par_scan %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) val); + flag()=1; + } + } + }); + } +}; + +template<typename Scalar, class ExecutionSpace> +struct functor_reduce { + typedef double value_type; + typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; + functor_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename policy_type::member_type team, double& sum) const { + sum += team.league_rank() * 100 + team.thread_rank(); + } +}; + +template<typename Scalar,class ExecutionSpace> +bool test_scalar(int nteams, int team_size, int test) { + Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> d_flag("flag"); + typename Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace>::HostMirror h_flag("h_flag"); + h_flag() = 0 ; + Kokkos::deep_copy(d_flag,h_flag); + + if(test==0) + Kokkos::parallel_for( std::string("A") , Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_vec_red<Scalar, ExecutionSpace>(d_flag)); + if(test==1) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_vec_red_join<Scalar, ExecutionSpace>(d_flag)); + if(test==2) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_vec_scan<Scalar, ExecutionSpace>(d_flag)); + if(test==3) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_vec_for<Scalar, ExecutionSpace>(d_flag)); + if(test==4) + Kokkos::parallel_for( "B" , Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_vec_single<Scalar, ExecutionSpace>(d_flag)); + if(test==5) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), + functor_team_for<Scalar, ExecutionSpace>(d_flag)); + if(test==6) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), + functor_team_reduce<Scalar, ExecutionSpace>(d_flag)); + if(test==7) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), + functor_team_reduce_join<Scalar, ExecutionSpace>(d_flag)); + if(test==8) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_team_vector_for<Scalar, ExecutionSpace>(d_flag)); + if(test==9) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_team_vector_reduce<Scalar, ExecutionSpace>(d_flag)); + if(test==10) + Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), + functor_team_vector_reduce_join<Scalar, ExecutionSpace>(d_flag)); + + Kokkos::deep_copy(h_flag,d_flag); + + return (h_flag() == 0); +} + +template<class ExecutionSpace> +bool Test(int test) { + bool passed = true; + passed = passed && test_scalar<int, ExecutionSpace>(317,33,test); + passed = passed && test_scalar<long long int, ExecutionSpace>(317,33,test); + passed = passed && test_scalar<float, ExecutionSpace>(317,33,test); + passed = passed && test_scalar<double, ExecutionSpace>(317,33,test); + passed = passed && test_scalar<my_complex, ExecutionSpace>(317,33,test); + return passed; +} + +} + diff --git a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4f136bc64b977e3243b9aaf789d4837e7e5ca793 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp @@ -0,0 +1,219 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#define KOKKOS_PRAGMA_UNROLL(a) + +namespace { + +template<class Scalar, class ExecutionSpace> +struct SumPlain { + typedef ExecutionSpace execution_space; + typedef typename Kokkos::View<Scalar*,execution_space> type; + type view; + SumPlain(type view_):view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (int i, Scalar& val) { + val += Scalar(); + } +}; + +template<class Scalar, class ExecutionSpace> +struct SumInitJoinFinalValueType { + typedef ExecutionSpace execution_space; + typedef typename Kokkos::View<Scalar*,execution_space> type; + type view; + typedef Scalar value_type; + SumInitJoinFinalValueType(type view_):view(view_) {} + + KOKKOS_INLINE_FUNCTION + void init(value_type& val) const { + val = value_type(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& val, volatile value_type& src) const { + val += src; + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i, value_type& val) const { + val += value_type(); + } + +}; + +template<class Scalar, class ExecutionSpace> +struct SumInitJoinFinalValueType2 { + typedef ExecutionSpace execution_space; + typedef typename Kokkos::View<Scalar*,execution_space> type; + type view; + typedef Scalar value_type; + SumInitJoinFinalValueType2(type view_):view(view_) {} + + KOKKOS_INLINE_FUNCTION + void init(volatile value_type& val) const { + val = value_type(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& val, const volatile value_type& src) const { + val += src; + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i, value_type& val) const { + val += value_type(); + } + +}; + +template<class Scalar, class ExecutionSpace> +struct SumInitJoinFinalValueTypeArray { + typedef ExecutionSpace execution_space; + typedef typename Kokkos::View<Scalar*,execution_space> type; + type view; + typedef Scalar value_type[]; + int n; + SumInitJoinFinalValueTypeArray(type view_, int n_):view(view_),n(n_) {} + + KOKKOS_INLINE_FUNCTION + void init(value_type val) const { + for(int k=0;k<n;k++) + val[k] = 0; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type val, const volatile value_type src) const { + for(int k=0;k<n;k++) + val[k] += src[k]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i, value_type val) const { + for(int k=0;k<n;k++) + val[k] += k*i; + } + +}; + +template<class Scalar, class ExecutionSpace> +struct SumWrongInitJoinFinalValueType { + typedef ExecutionSpace execution_space; + typedef typename Kokkos::View<Scalar*,execution_space> type; + type view; + typedef Scalar value_type; + SumWrongInitJoinFinalValueType(type view_):view(view_) {} + + KOKKOS_INLINE_FUNCTION + void init(double& val) const { + val = double(); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& val, const value_type& src) const { + val += src; + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i, value_type& val) const { + val += value_type(); + } + +}; + +template<class Scalar, class ExecutionSpace> +void TestTemplateMetaFunctions() { + typedef typename Kokkos::View<Scalar*,ExecutionSpace> type; + type a("A",100); +/* #ifdef KOKKOS_HAVE_CXX11 + int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit<SumPlain<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_plain_has_init_arg,0); + int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg,1); + int sum_initjoinfinalvaluetype_has_init_arg2 = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg2,1); + int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_init_arg,0); + + //int sum_initjoinfinalvaluetypearray_has_init_arg = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueTypeArray<Scalar,ExecutionSpace>, Scalar[] >::value; + //ASSERT_EQ(sum_initjoinfinalvaluetypearray_has_init_arg,1); + + #else + + int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit<SumPlain<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_plain_has_init_arg,0); + int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg,1); + int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_init_arg,1); + + #endif + + //printf("Values Init: %i %i %i\n",sum_plain_has_init_arg,sum_initjoinfinalvaluetype_has_init_arg,sum_wronginitjoinfinalvaluetype_has_init_arg); + +#ifdef KOKKOS_HAVE_CXX11 + int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumPlain<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_plain_has_join_arg,0); + int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg,1); + int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg2,1); + int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_join_arg,0); +#else + int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumPlain<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_plain_has_join_arg,0); + int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg,1); + int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg2,1); + int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar& >::value; + ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_join_arg,1); +#endif*/ + //printf("Values Join: %i %i %i\n",sum_plain_has_join_arg,sum_initjoinfinalvaluetype_has_join_arg,sum_wronginitjoinfinalvaluetype_has_join_arg); +} + +} diff --git a/lib/kokkos/core/unit_test/TestThreads.cpp b/lib/kokkos/core/unit_test/TestThreads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93049b95dd7c75bcd88b8d6408e8a0249f905855 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestThreads.cpp @@ -0,0 +1,614 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <Threads/Kokkos_Threads_TaskPolicy.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + +#include <TestViewImpl.hpp> + +#include <TestViewAPI.hpp> +#include <TestViewSubview.hpp> +#include <TestViewOfClass.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestAggregateReduction.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskPolicy.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestMemorySpaceTracking.hpp> +#include <TestTemplateMetaFunctions.hpp> + + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + // Finalize without initialize is a no-op: + Kokkos::Threads::finalize(); + + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + unsigned threads_count = 0 ; + + // Initialize and finalize with no threads: + Kokkos::Threads::initialize( 1u ); + Kokkos::Threads::finalize(); + + threads_count = std::max( 1u , numa_count ) + * std::max( 2u , cores_per_numa * threads_per_core ); + + Kokkos::Threads::initialize( threads_count ); + Kokkos::Threads::finalize(); + + threads_count = std::max( 1u , numa_count * 2 ) + * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + + Kokkos::Threads::initialize( threads_count ); + Kokkos::Threads::finalize(); + + // Quick attempt to verify thread start/terminate don't have race condition: + threads_count = std::max( 1u , numa_count ) + * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + for ( unsigned i = 0 ; i < 10 ; ++i ) { + Kokkos::Threads::initialize( threads_count ); + Kokkos::Threads::sleep(); + Kokkos::Threads::wake(); + Kokkos::Threads::finalize(); + } + + Kokkos::Threads::initialize( threads_count ); + Kokkos::Threads::print_configuration( std::cout , true /* detailed */ ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +TEST_F( threads , init ) { + ; +} + +TEST_F( threads , md_range ) { + TestMDRange_2D< Kokkos::Threads >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100); +} + +TEST_F( threads , dispatch ) +{ + const int repeat = 100 ; + for ( int i = 0 ; i < repeat ; ++i ) { + for ( int j = 0 ; j < repeat ; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j) + , KOKKOS_LAMBDA( int ) {} ); + }} +} + +TEST_F( threads , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >(); +} + +TEST_F( threads, policy_construction) { + TestRangePolicyConstruction< Kokkos::Threads >(); + TestTeamPolicyConstruction< Kokkos::Threads >(); +} + +TEST_F( threads , impl_view_mapping ) { + test_view_mapping< Kokkos::Threads >(); + test_view_mapping_subview< Kokkos::Threads >(); + test_view_mapping_operator< Kokkos::Threads >(); + TestViewMappingAtomic< Kokkos::Threads >::run(); +} + + +TEST_F( threads, view_impl) { + test_view_impl< Kokkos::Threads >(); +} + +TEST_F( threads, view_api) { + TestViewAPI< double , Kokkos::Threads >(); +} + +TEST_F( threads , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::Threads >(); +} + + +TEST_F( threads, view_aggregate ) { + TestViewAggregate< Kokkos::Threads >(); + TestViewAggregateReduction< Kokkos::Threads >(); +} + +TEST_F( threads , range_tag ) +{ + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + +TEST_F( threads , team_tag ) +{ + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( threads, long_reduce) { + TestReduce< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, double_reduce) { + TestReduce< double , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads , reducers ) +{ + TestReducers<int, Kokkos::Threads>::execute_integer(); + TestReducers<size_t, Kokkos::Threads>::execute_integer(); + TestReducers<double, Kokkos::Threads>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic(); +} + +TEST_F( threads, team_long_reduce) { + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( threads, team_double_reduce) { + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( threads, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, team_shared_request) { + TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( threads, team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( threads, shmem_size) { + TestShmemSize< Kokkos::Threads >(); +} + +TEST_F( threads , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::Threads > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Threads > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Threads > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , atomics ) +{ + const int loop_count = 1e6 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,3) ) ); +} + +TEST_F( threads , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) ); + } + +} + +//---------------------------------------------------------------------------- + +#if 0 +TEST_F( threads , scan_small ) +{ + typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ; + for ( int i = 0 ; i < 1000 ; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::Threads::fence(); +} +#endif + +TEST_F( threads , scan ) +{ + TestScan< Kokkos::Threads >::test_range( 1 , 1000 ); + TestScan< Kokkos::Threads >( 1000000 ); + TestScan< Kokkos::Threads >( 10000000 ); + Kokkos::Threads::fence(); +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , team_scan ) +{ + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); +} + +TEST_F( threads , memory_space ) +{ + TestMemorySpace< Kokkos::Threads >(); +} + +TEST_F( threads , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Threads >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) +TEST_F( threads , cxx11 ) +{ + if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) ); + } +} + +TEST_F( threads , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Threads >(); +} +#endif /* #if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) */ + +TEST_F( threads , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) ); +} + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +TEST_F( threads , task_policy ) +{ + TestTaskPolicy::test_task_dep< Kokkos::Threads >( 10 ); + + for ( long i = 0 ; i < 25 ; ++i ) { +// printf( "test_fib(): %2ld\n", i ); + TestTaskPolicy::test_fib< Kokkos::Threads >(i); + } + for ( long i = 0 ; i < 35 ; ++i ) { +// printf( "test_fib2(): %2ld\n", i ); + TestTaskPolicy::test_fib2< Kokkos::Threads >(i); + } +} + +TEST_F( threads , task_team ) +{ + TestTaskPolicy::test_task_team< Kokkos::Threads >(1000); +} + +TEST_F( threads , task_latch ) +{ + TestTaskPolicy::test_latch< Kokkos::Threads >(10); + TestTaskPolicy::test_latch< Kokkos::Threads >(1000); +} + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + +} // namespace Test + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */ diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dfb2bd81b3dec3485688f9827d3f1f7ad24ddb9d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -0,0 +1,153 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef TEST_TILE_HPP +#define TEST_TILE_HPP + +#include <Kokkos_Core.hpp> + +namespace TestTile { + +template < typename Device , typename TileLayout> +struct ReduceTileErrors +{ + typedef Device execution_space ; + + typedef Kokkos::View< ptrdiff_t**, TileLayout, Device> array_type; + typedef Kokkos::View< ptrdiff_t[ TileLayout::N0 ][ TileLayout::N1 ], Kokkos::LayoutLeft , Device > tile_type ; + + array_type m_array ; + + typedef ptrdiff_t value_type; + + ReduceTileErrors( array_type a ) + : m_array(a) + {} + + + KOKKOS_INLINE_FUNCTION + static void init( value_type & errors ) + { + errors = 0; + } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & errors , + const volatile value_type & src_errors ) + { + errors += src_errors; + } + + // Initialize + KOKKOS_INLINE_FUNCTION + void operator()( size_t iwork ) const + { + const size_t i = iwork % m_array.dimension_0(); + const size_t j = iwork / m_array.dimension_0(); + if ( j < m_array.dimension_1() ) { + m_array(i,j) = & m_array(i,j) - & m_array(0,0); + +// printf("m_array(%d,%d) = %d\n",int(i),int(j),int(m_array(i,j))); + + } + } + + // Verify: + KOKKOS_INLINE_FUNCTION + void operator()( size_t iwork , value_type & errors ) const + { + const size_t tile_dim0 = ( m_array.dimension_0() + TileLayout::N0 - 1 ) / TileLayout::N0 ; + const size_t tile_dim1 = ( m_array.dimension_1() + TileLayout::N1 - 1 ) / TileLayout::N1 ; + + const size_t itile = iwork % tile_dim0 ; + const size_t jtile = iwork / tile_dim0 ; + + if ( jtile < tile_dim1 ) { + + tile_type tile = Kokkos::tile_subview( m_array , itile , jtile ); + + if ( tile(0,0) != ptrdiff_t(( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { + ++errors ; + } + else { + + for ( size_t j = 0 ; j < size_t(TileLayout::N1) ; ++j ) { + for ( size_t i = 0 ; i < size_t(TileLayout::N0) ; ++i ) { + const size_t iglobal = i + itile * TileLayout::N0 ; + const size_t jglobal = j + jtile * TileLayout::N1 ; + + if ( iglobal < m_array.dimension_0() && jglobal < m_array.dimension_1() ) { + if ( tile(i,j) != ptrdiff_t( tile(0,0) + i + j * TileLayout::N0 ) ) ++errors ; + +// printf("tile(%d,%d)(%d,%d) = %d\n",int(itile),int(jtile),int(i),int(j),int(tile(i,j))); + + } + } + } + } + } + } +}; + +template< class Space , unsigned N0 , unsigned N1 > +void test( const size_t dim0 , const size_t dim1 ) +{ + typedef Kokkos::LayoutTileLeft<N0,N1> array_layout ; + typedef ReduceTileErrors< Space , array_layout > functor_type ; + + const size_t tile_dim0 = ( dim0 + N0 - 1 ) / N0 ; + const size_t tile_dim1 = ( dim1 + N1 - 1 ) / N1 ; + + typename functor_type::array_type array("",dim0,dim1); + + Kokkos::parallel_for( Kokkos::RangePolicy<Space,size_t>(0,dim0*dim1) , functor_type( array ) ); + + ptrdiff_t error = 0 ; + + Kokkos::parallel_reduce( Kokkos::RangePolicy<Space,size_t>(0,tile_dim0*tile_dim1) , functor_type( array ) , error ); + + EXPECT_EQ( error , ptrdiff_t(0) ); +} + +} /* namespace TestTile */ + +#endif //TEST_TILE_HPP + diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ae4c6d2185d12bdf1f61ab66c73244e6b38bb50b --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -0,0 +1,1416 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> + +/*--------------------------------------------------------------------------*/ + + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +#if KOKKOS_USING_EXP_VIEW + +template< class T , class ... P > +size_t allocation_count( const Kokkos::View<T,P...> & view ) +{ + const size_t card = view.size(); + const size_t alloc = view.span(); + + const int memory_span = Kokkos::View<int*>::required_allocation_size(100); + + return (card <= alloc && memory_span == 400) ? alloc : 0 ; +} + +#else + +template< class T , class L , class D , class M , class S > +size_t allocation_count( const Kokkos::View<T,L,D,M,S> & view ) +{ + const size_t card = Kokkos::Impl::cardinality_count( view.shape() ); + const size_t alloc = view.capacity(); + + return card <= alloc ? alloc : 0 ; +} + +#endif + +/*--------------------------------------------------------------------------*/ + +template< typename T, class DeviceType> +struct TestViewOperator +{ + typedef typename DeviceType::execution_space execution_space ; + + static const unsigned N = 100 ; + static const unsigned D = 3 ; + + typedef Kokkos::View< T*[D] , execution_space > view_type ; + + const view_type v1 ; + const view_type v2 ; + + TestViewOperator() + : v1( "v1" , N ) + , v2( "v2" , N ) + {} + + static void testit() + { + Kokkos::parallel_for( N , TestViewOperator() ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned i ) const + { + const unsigned X = 0 ; + const unsigned Y = 1 ; + const unsigned Z = 2 ; + + v2(i,X) = v1(i,X); + v2(i,Y) = v1(i,Y); + v2(i,Z) = v1(i,Z); + } +}; + +/*--------------------------------------------------------------------------*/ + +template< class DataType , + class DeviceType , + unsigned Rank = Kokkos::ViewTraits< DataType >::rank > +struct TestViewOperator_LeftAndRight ; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i7 = 0 ; i7 < unsigned(left.dimension_7()) ; ++i7 ) + for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) + for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4, i5, i6, i7 ) - + & left( 0, 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + + if ( & left(i0,i1,i2,i3,i4,i5,i6,i7) != + & left_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { + update |= 4 ; + } + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) + for ( unsigned i7 = 0 ; i7 < unsigned(right.dimension_7()) ; ++i7 ) + { + const long j = & right( i0, i1, i2, i3, i4, i5, i6, i7 ) - + & right( 0, 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + + if ( & right(i0,i1,i2,i3,i4,i5,i6,i7) != + & right_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { + update |= 8 ; + } + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) + for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4, i5, i6 ) - + & left( 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) + { + const long j = & right( i0, i1, i2, i3, i4, i5, i6 ) - + & right( 0, 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4, i5 ) - + & left( 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + { + const long j = & right( i0, i1, i2, i3, i4, i5 ) - + & right( 0, 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3, i4 ) - + & left( 0, 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + + if ( & left( i0, i1, i2, i3, i4 ) != + & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4 ; } + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + { + const long j = & right( i0, i1, i2, i3, i4 ) - + & right( 0, 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + + if ( & right( i0, i1, i2, i3, i4 ) != + & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8 ; } + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2, i3 ) - + & left( 0, 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + { + const long j = & right( i0, i1, i2, i3 ) - + & right( 0, 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( std::string("left") ) + , right( std::string("right") ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1, i2 ) - + & left( 0, 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + + if ( & left(i0,i1,i2) != & left_stride(i0,i1,i2) ) { update |= 4 ; } + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + { + const long j = & right( i0, i1, i2 ) - + & right( 0, 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + + if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; } + } + +#if KOKKOS_USING_EXP_VIEW + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + { + if ( & left(i0,i1,i2) != & left(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } + } +#endif + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + left_view left ; + right_view right ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + long offset ; + + offset = -1 ; + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { + const long j = & left( i0, i1 ) - + & left( 0, 0 ); + if ( j <= offset || left_alloc <= j ) { update |= 1 ; } + offset = j ; + } + + offset = -1 ; + for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + { + const long j = & right( i0, i1 ) - + & right( 0, 0 ); + if ( j <= offset || right_alloc <= j ) { update |= 2 ; } + offset = j ; + } + +#if KOKKOS_USING_EXP_VIEW + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + { + if ( & left(i0,i1) != & left(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } + } +#endif + } +}; + +template< class DataType , class DeviceType > +struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +{ + typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::memory_space memory_space ; + typedef typename execution_space::size_type size_type ; + + typedef int value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & input ) + { update |= input ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + + typedef Kokkos:: + View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + + typedef Kokkos:: + View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + + left_view left ; + right_view right ; + stride_view left_stride ; + stride_view right_stride ; + long left_alloc ; + long right_alloc ; + + TestViewOperator_LeftAndRight() + : left( "left" ) + , right( "right" ) + , left_stride( left ) + , right_stride( right ) + , left_alloc( allocation_count( left ) ) + , right_alloc( allocation_count( right ) ) + {} + + static void testit() + { + TestViewOperator_LeftAndRight driver ; + + int error_flag = 0 ; + + Kokkos::parallel_reduce( 1 , driver , error_flag ); + + ASSERT_EQ( error_flag , 0 ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type , value_type & update ) const + { + for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + { +#if KOKKOS_USING_EXP_VIEW + if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } +#endif + if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } + if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } + } + } +}; + +template<class Layout, class DeviceType> +struct TestViewMirror { + + template<class MemoryTraits> + void static test_mirror() { + Kokkos::View<double*, Layout, Kokkos::HostSpace> a_org("A",1000); + Kokkos::View<double*, Layout, Kokkos::HostSpace, MemoryTraits> a_h = a_org; + auto a_h2 = Kokkos::create_mirror(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror(DeviceType(),a_h); + + int equal_ptr_h_h2 = (a_h.data() ==a_h2.data())?1:0; + int equal_ptr_h_d = (a_h.data() ==a_d. data())?1:0; + int equal_ptr_h2_d = (a_h2.data()==a_d. data())?1:0; + + ASSERT_EQ(equal_ptr_h_h2,0); + ASSERT_EQ(equal_ptr_h_d ,0); + ASSERT_EQ(equal_ptr_h2_d,0); + + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + } + + + template<class MemoryTraits> + void static test_mirror_view() { + Kokkos::View<double*, Layout, Kokkos::HostSpace> a_org("A",1000); + Kokkos::View<double*, Layout, Kokkos::HostSpace, MemoryTraits> a_h = a_org; + auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); + auto a_d = Kokkos::create_mirror_view(DeviceType(),a_h); + + int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; + int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; + int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; + + int is_same_memspace = std::is_same<Kokkos::HostSpace,typename DeviceType::memory_space>::value?1:0; + ASSERT_EQ(equal_ptr_h_h2,1); + ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); + ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + + + ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); + ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + } + + void static testit() { + test_mirror<Kokkos::MemoryTraits<0>>(); + test_mirror<Kokkos::MemoryTraits<Kokkos::Unmanaged>>(); + test_mirror_view<Kokkos::MemoryTraits<0>>(); + test_mirror_view<Kokkos::MemoryTraits<Kokkos::Unmanaged>>(); + } +}; + +/*--------------------------------------------------------------------------*/ + +template< typename T, class DeviceType > +class TestViewAPI +{ +public: + typedef DeviceType device ; + + enum { N0 = 1000 , + N1 = 3 , + N2 = 5 , + N3 = 7 }; + + typedef Kokkos::View< T , device > dView0 ; + typedef Kokkos::View< T* , device > dView1 ; + typedef Kokkos::View< T*[N1] , device > dView2 ; + typedef Kokkos::View< T*[N1][N2] , device > dView3 ; + typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ; + typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ; + + typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ; + + typedef typename dView0::host_mirror_space host ; + + TestViewAPI() + { + run_test_mirror(); + run_test(); + run_test_scalar(); + run_test_const(); + run_test_subview(); + run_test_subview_strided(); + run_test_vector(); + + TestViewOperator< T , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4] , device >::testit(); + TestViewOperator_LeftAndRight< int[2][3] , device >::testit(); + TestViewOperator_LeftAndRight< int[2] , device >::testit(); + TestViewMirror<Kokkos::LayoutLeft, device >::testit(); + TestViewMirror<Kokkos::LayoutRight, device >::testit(); + + } + + static void run_test_mirror() + { + typedef Kokkos::View< int , host > view_type ; + typedef typename view_type::HostMirror mirror_type ; + + static_assert( std::is_same< typename view_type::memory_space + , typename mirror_type::memory_space + >::value , "" ); + + view_type a("a"); + mirror_type am = Kokkos::create_mirror_view(a); + mirror_type ax = Kokkos::create_mirror(a); + ASSERT_EQ( & a() , & am() ); + } + + static void run_test_scalar() + { + typedef typename dView0::HostMirror hView0 ; + + dView0 dx , dy ; + hView0 hx , hy ; + + dx = dView0( "dx" ); + dy = dView0( "dy" ); + + hx = Kokkos::create_mirror( dx ); + hy = Kokkos::create_mirror( dy ); + + hx() = 1 ; + + Kokkos::deep_copy( dx , hx ); + Kokkos::deep_copy( dy , dx ); + Kokkos::deep_copy( hy , dy ); + + ASSERT_EQ( hx(), hy() ); + } + + static void run_test() + { + // mfh 14 Feb 2014: This test doesn't actually create instances of + // these types. In order to avoid "declared but unused typedef" + // warnings, we declare empty instances of these types, with the + // usual "(void)" marker to avoid compiler warnings for unused + // variables. + + typedef typename dView0::HostMirror hView0 ; + typedef typename dView1::HostMirror hView1 ; + typedef typename dView2::HostMirror hView2 ; + typedef typename dView3::HostMirror hView3 ; + typedef typename dView4::HostMirror hView4 ; + + { + hView0 thing; + (void) thing; + } + { + hView1 thing; + (void) thing; + } + { + hView2 thing; + (void) thing; + } + { + hView3 thing; + (void) thing; + } + { + hView4 thing; + (void) thing; + } + + dView4 dx , dy , dz ; + hView4 hx , hy , hz ; + + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_TRUE( dy.ptr_on_device() == 0 ); + ASSERT_TRUE( dz.ptr_on_device() == 0 ); + ASSERT_TRUE( hx.ptr_on_device() == 0 ); + ASSERT_TRUE( hy.ptr_on_device() == 0 ); + ASSERT_TRUE( hz.ptr_on_device() == 0 ); + ASSERT_EQ( dx.dimension_0() , 0u ); + ASSERT_EQ( dy.dimension_0() , 0u ); + ASSERT_EQ( dz.dimension_0() , 0u ); + ASSERT_EQ( hx.dimension_0() , 0u ); + ASSERT_EQ( hy.dimension_0() , 0u ); + ASSERT_EQ( hz.dimension_0() , 0u ); + ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dz.dimension_1() , unsigned(N1) ); + ASSERT_EQ( hx.dimension_1() , unsigned(N1) ); + ASSERT_EQ( hy.dimension_1() , unsigned(N1) ); + ASSERT_EQ( hz.dimension_1() , unsigned(N1) ); + + dx = dView4( "dx" , N0 ); + dy = dView4( "dy" , N0 ); + + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(1) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(1) ); + #endif + + dView4_unmanaged unmanaged_dx = dx; + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(1) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(1) ); + #endif + + dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), + dx.dimension_3()); + + { + // Destruction of this view should be harmless + const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device() , + dx.dimension_0() , + dx.dimension_1() , + dx.dimension_2() , + dx.dimension_3() ); + } + + const_dView4 const_dx = dx ; + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(2) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); + #endif + + { + const_dView4 const_dx2; + const_dx2 = const_dx; + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(3) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); + #endif + + const_dx2 = dy; + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(2) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); + #endif + + const_dView4 const_dx3(dx); + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(3) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); + #endif + + dView4_unmanaged dx4_unmanaged(dx); + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(3) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); + #endif + } + + #if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( dx.use_count() , size_t(2) ); + #else + ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); + #endif + + + ASSERT_FALSE( dx.ptr_on_device() == 0 ); + ASSERT_FALSE( const_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( unmanaged_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( unmanaged_from_ptr_dx.ptr_on_device() == 0 ); + ASSERT_FALSE( dy.ptr_on_device() == 0 ); + ASSERT_NE( dx , dy ); + + ASSERT_EQ( dx.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dx.dimension_2() , unsigned(N2) ); + ASSERT_EQ( dx.dimension_3() , unsigned(N3) ); + + ASSERT_EQ( dy.dimension_0() , unsigned(N0) ); + ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dy.dimension_2() , unsigned(N2) ); + ASSERT_EQ( dy.dimension_3() , unsigned(N3) ); + + ASSERT_EQ( unmanaged_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + + hx = Kokkos::create_mirror( dx ); + hy = Kokkos::create_mirror( dy ); + + // T v1 = hx() ; // Generates compile error as intended + // T v2 = hx(0,0) ; // Generates compile error as intended + // hx(0,0) = v2 ; // Generates compile error as intended + +#if ! KOKKOS_USING_EXP_VIEW + // Testing with asynchronous deep copy with respect to device + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + + Kokkos::deep_copy(typename hView4::execution_space(), dx , hx ); + Kokkos::deep_copy(typename hView4::execution_space(), dy , dx ); + Kokkos::deep_copy(typename hView4::execution_space(), hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::deep_copy(typename hView4::execution_space(), dx , T(0) ); + Kokkos::deep_copy(typename hView4::execution_space(), hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} + } + + // Testing with asynchronous deep copy with respect to host + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + Kokkos::deep_copy(typename dView4::execution_space(), dx , hx ); + Kokkos::deep_copy(typename dView4::execution_space(), dy , dx ); + Kokkos::deep_copy(typename dView4::execution_space(), hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::deep_copy(typename dView4::execution_space(), dx , T(0) ); + Kokkos::deep_copy(typename dView4::execution_space(), hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} + } +#endif /* #if ! KOKKOS_USING_EXP_VIEW */ + + // Testing with synchronous deep copy + { + size_t count = 0 ; + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { + hx(ip,i1,i2,i3) = ++count ; + }}}} + + Kokkos::deep_copy( dx , hx ); + Kokkos::deep_copy( dy , dx ); + Kokkos::deep_copy( hy , dy ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } + }}}} + + Kokkos::deep_copy( dx , T(0) ); + Kokkos::deep_copy( hx , dx ); + + for ( size_t ip = 0 ; ip < N0 ; ++ip ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } + }}}} + } + dz = dx ; ASSERT_EQ( dx, dz); ASSERT_NE( dy, dz); + dz = dy ; ASSERT_EQ( dy, dz); ASSERT_NE( dx, dz); + + dx = dView4(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_FALSE( dy.ptr_on_device() == 0 ); + ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dy = dView4(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_TRUE( dy.ptr_on_device() == 0 ); + ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dz = dView4(); + ASSERT_TRUE( dx.ptr_on_device() == 0 ); + ASSERT_TRUE( dy.ptr_on_device() == 0 ); + ASSERT_TRUE( dz.ptr_on_device() == 0 ); + } + + typedef T DataType[2] ; + + static void + check_auto_conversion_to_const( + const Kokkos::View< const DataType , device > & arg_const , + const Kokkos::View< DataType , device > & arg ) + { + ASSERT_TRUE( arg_const == arg ); + } + + static void run_test_const() + { + typedef Kokkos::View< DataType , device > typeX ; + typedef Kokkos::View< const DataType , device > const_typeX ; + typedef Kokkos::View< const DataType , device , Kokkos::MemoryRandomAccess > const_typeR ; + typeX x( "X" ); + const_typeX xc = x ; + const_typeR xr = x ; + + ASSERT_TRUE( xc == x ); + ASSERT_TRUE( x == xc ); + + // For CUDA the constant random access View does not return + // an lvalue reference due to retrieving through texture cache + // therefore not allowed to query the underlying pointer. +#if defined( KOKKOS_HAVE_CUDA ) + if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value ) +#endif + { + ASSERT_TRUE( x.ptr_on_device() == xr.ptr_on_device() ); + } + + // typeX xf = xc ; // setting non-const from const must not compile + + check_auto_conversion_to_const( x , x ); + } + + static void run_test_subview() + { + typedef Kokkos::View< const T , device > sView ; + + dView0 d0( "d0" ); + dView1 d1( "d1" , N0 ); + dView2 d2( "d2" , N0 ); + dView3 d3( "d3" , N0 ); + dView4 d4( "d4" , N0 ); + + sView s0 = d0 ; + sView s1 = Kokkos::subview( d1 , 1 ); + sView s2 = Kokkos::subview( d2 , 1 , 1 ); + sView s3 = Kokkos::subview( d3 , 1 , 1 , 1 ); + sView s4 = Kokkos::subview( d4 , 1 , 1 , 1 , 1 ); + } + + static void run_test_subview_strided() + { + typedef Kokkos::View< int **** , Kokkos::LayoutLeft , host > view_left_4 ; + typedef Kokkos::View< int **** , Kokkos::LayoutRight , host > view_right_4 ; + typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2 ; + typedef Kokkos::View< int ** , Kokkos::LayoutRight , host > view_right_2 ; + + typedef Kokkos::View< int * , Kokkos::LayoutStride , host > view_stride_1 ; + typedef Kokkos::View< int ** , Kokkos::LayoutStride , host > view_stride_2 ; + + view_left_2 xl2("xl2", 100 , 200 ); + view_right_2 xr2("xr2", 100 , 200 ); + view_stride_1 yl1 = Kokkos::subview( xl2 , 0 , Kokkos::ALL() ); + view_stride_1 yl2 = Kokkos::subview( xl2 , 1 , Kokkos::ALL() ); + view_stride_1 yr1 = Kokkos::subview( xr2 , 0 , Kokkos::ALL() ); + view_stride_1 yr2 = Kokkos::subview( xr2 , 1 , Kokkos::ALL() ); + + ASSERT_EQ( yl1.dimension_0() , xl2.dimension_1() ); + ASSERT_EQ( yl2.dimension_0() , xl2.dimension_1() ); + ASSERT_EQ( yr1.dimension_0() , xr2.dimension_1() ); + ASSERT_EQ( yr2.dimension_0() , xr2.dimension_1() ); + + ASSERT_EQ( & yl1(0) - & xl2(0,0) , 0 ); + ASSERT_EQ( & yl2(0) - & xl2(1,0) , 0 ); + ASSERT_EQ( & yr1(0) - & xr2(0,0) , 0 ); + ASSERT_EQ( & yr2(0) - & xr2(1,0) , 0 ); + + view_left_4 xl4( "xl4" , 10 , 20 , 30 , 40 ); + view_right_4 xr4( "xr4" , 10 , 20 , 30 , 40 ); + + view_stride_2 yl4 = Kokkos::subview( xl4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); + view_stride_2 yr4 = Kokkos::subview( xr4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); + + ASSERT_EQ( yl4.dimension_0() , xl4.dimension_1() ); + ASSERT_EQ( yl4.dimension_1() , xl4.dimension_3() ); + ASSERT_EQ( yr4.dimension_0() , xr4.dimension_1() ); + ASSERT_EQ( yr4.dimension_1() , xr4.dimension_3() ); + + ASSERT_EQ( & yl4(4,4) - & xl4(1,4,2,4) , 0 ); + ASSERT_EQ( & yr4(4,4) - & xr4(1,4,2,4) , 0 ); + } + + static void run_test_vector() + { + static const unsigned Length = 1000 , Count = 8 ; + + typedef Kokkos::View< T* , Kokkos::LayoutLeft , host > vector_type ; + typedef Kokkos::View< T** , Kokkos::LayoutLeft , host > multivector_type ; + + typedef Kokkos::View< T* , Kokkos::LayoutRight , host > vector_right_type ; + typedef Kokkos::View< T** , Kokkos::LayoutRight , host > multivector_right_type ; + + typedef Kokkos::View< const T* , Kokkos::LayoutRight, host > const_vector_right_type ; + typedef Kokkos::View< const T* , Kokkos::LayoutLeft , host > const_vector_type ; + typedef Kokkos::View< const T** , Kokkos::LayoutLeft , host > const_multivector_type ; + + multivector_type mv = multivector_type( "mv" , Length , Count ); + multivector_right_type mv_right = multivector_right_type( "mv" , Length , Count ); + + vector_type v1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); + vector_type v2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); + vector_type v3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + + vector_type rv1 = Kokkos::subview( mv_right , 0 , Kokkos::ALL() ); + vector_type rv2 = Kokkos::subview( mv_right , 1 , Kokkos::ALL() ); + vector_type rv3 = Kokkos::subview( mv_right , 2 , Kokkos::ALL() ); + + multivector_type mv1 = Kokkos::subview( mv , std::make_pair( 1 , 998 ) , + std::make_pair( 2 , 5 ) ); + + multivector_right_type mvr1 = + Kokkos::subview( mv_right , + std::make_pair( 1 , 998 ) , + std::make_pair( 2 , 5 ) ); + + const_vector_type cv1 = Kokkos::subview( mv , Kokkos::ALL(), 0 ); + const_vector_type cv2 = Kokkos::subview( mv , Kokkos::ALL(), 1 ); + const_vector_type cv3 = Kokkos::subview( mv , Kokkos::ALL(), 2 ); + + vector_right_type vr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); + vector_right_type vr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); + vector_right_type vr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + + const_vector_right_type cvr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); + const_vector_right_type cvr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); + const_vector_right_type cvr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + + ASSERT_TRUE( & v1[0] == & v1(0) ); + ASSERT_TRUE( & v1[0] == & mv(0,0) ); + ASSERT_TRUE( & v2[0] == & mv(0,1) ); + ASSERT_TRUE( & v3[0] == & mv(0,2) ); + + ASSERT_TRUE( & cv1[0] == & mv(0,0) ); + ASSERT_TRUE( & cv2[0] == & mv(0,1) ); + ASSERT_TRUE( & cv3[0] == & mv(0,2) ); + + ASSERT_TRUE( & vr1[0] == & mv(0,0) ); + ASSERT_TRUE( & vr2[0] == & mv(0,1) ); + ASSERT_TRUE( & vr3[0] == & mv(0,2) ); + + ASSERT_TRUE( & cvr1[0] == & mv(0,0) ); + ASSERT_TRUE( & cvr2[0] == & mv(0,1) ); + ASSERT_TRUE( & cvr3[0] == & mv(0,2) ); + + ASSERT_TRUE( & mv1(0,0) == & mv( 1 , 2 ) ); + ASSERT_TRUE( & mv1(1,1) == & mv( 2 , 3 ) ); + ASSERT_TRUE( & mv1(3,2) == & mv( 4 , 4 ) ); + ASSERT_TRUE( & mvr1(0,0) == & mv_right( 1 , 2 ) ); + ASSERT_TRUE( & mvr1(1,1) == & mv_right( 2 , 3 ) ); + ASSERT_TRUE( & mvr1(3,2) == & mv_right( 4 , 4 ) ); + + const_vector_type c_cv1( v1 ); + typename vector_type::const_type c_cv2( v2 ); + typename const_vector_type::const_type c_ccv2( v2 ); + + const_multivector_type cmv( mv ); + typename multivector_type::const_type cmvX( cmv ); + typename const_multivector_type::const_type ccmvX( cmv ); + } +}; + +} // namespace Test + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestViewImpl.hpp b/lib/kokkos/core/unit_test/TestViewImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c34ef759d1dd41bbb9238ccdb37f2aa28955af6d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewImpl.hpp @@ -0,0 +1,289 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +#if KOKKOS_USING_EXP_VIEW + +namespace Test { + +template < class Device > +void test_view_impl() {} + +} + +#else + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +struct DummyMemorySpace +{ + typedef DummyMemorySpace memory_space ; + typedef unsigned size_type ; +}; + +/*--------------------------------------------------------------------------*/ + +template< class Type > +struct DefineShape { + typedef typename Kokkos::Impl::AnalyzeShape<Type>::shape type ; +}; + +template< class Type > +struct ExtractValueType { + typedef typename Kokkos::Impl::AnalyzeShape<Type>::value_type type ; +}; + +template< class Type > +struct ArrayType { typedef Type type ; }; + +template < class Device > +void test_view_impl() +{ + //typedef typename Device::memory_space memory_space ; // unused + + typedef ArrayType< int[100] >::type type_01 ; + typedef ArrayType< int* >::type type_11 ; + typedef ArrayType< int[5][6][700] >::type type_03 ; + typedef ArrayType< double*[8][9][900] >::type type_14 ; + typedef ArrayType< long** >::type type_22 ; + typedef ArrayType< short **[5][6][7] >::type type_25 ; + typedef ArrayType< const short **[5][6][7] >::type const_type_25 ; + typedef ArrayType< short***[5][6][7] >::type type_36 ; + typedef ArrayType< const short***[5][6][7] >::type const_type_36 ; + + // mfh 14 Feb 2014: With gcc 4.8.2 -Wall, this emits a warning: + // + // typedef ‘ok_const_25’ locally defined but not used [-Wunused-local-typedefs] + // + // It's unfortunate that this is the case, because the typedef is + // being used for a compile-time check! We deal with this by + // declaring an instance of ok_const_25, and marking it with + // "(void)" so that instance doesn't emit an "unused variable" + // warning. + // + // typedef typename Kokkos::Impl::StaticAssertSame< + // typename Kokkos::Impl::AnalyzeShape<type_25>::const_type , + // typename Kokkos::Impl::AnalyzeShape<const_type_25>::type + // > ok_const_25 ; + + typedef typename Kokkos::Impl::StaticAssertSame< + typename Kokkos::Impl::AnalyzeShape<type_25>::const_type, + typename Kokkos::Impl::AnalyzeShape<const_type_25>::type + > ok_const_25 ; + + typedef typename Kokkos::Impl::StaticAssertSame< + typename Kokkos::Impl::AnalyzeShape<type_36>::const_type, + typename Kokkos::Impl::AnalyzeShape<const_type_36>::type + > ok_const_36 ; + { + ok_const_25 thing_25 ; + ok_const_36 thing_36 ; + (void) thing_25 ; // silence warning for unused variable + (void) thing_36 ; // silence warning for unused variable + } + + ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_03>::type , int >::value ) ); + ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_14>::type , double >::value ) ); + ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_22>::type , long >::value ) ); + ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , short >::value ) ); + + ASSERT_FALSE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , int >::value ) ); + + typedef typename DefineShape< type_01 >::type shape_01_type ; + typedef typename DefineShape< type_11 >::type shape_11_type ; + typedef typename DefineShape< type_03 >::type shape_03_type ; + typedef typename DefineShape< type_14 >::type shape_14_type ; + typedef typename DefineShape< type_22 >::type shape_22_type ; + typedef typename DefineShape< type_36 >::type shape_36_type ; + + ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_36_type::rank == 6 >::value ) ); + ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_03_type::rank == 3 >::value ) ); + + shape_01_type shape_01 ; shape_01_type::assign( shape_01 ); + shape_11_type shape_11 ; shape_11_type::assign( shape_11, 1000 ); + shape_03_type shape_03 ; shape_03_type::assign( shape_03 ); + shape_14_type shape_14 ; shape_14_type::assign( shape_14 , 0 ); + shape_22_type shape_22 ; shape_22_type::assign( shape_22 , 0 , 0 ); + shape_36_type shape_36 ; shape_36_type::assign( shape_36 , 10 , 20 , 30 ); + + ASSERT_TRUE( shape_01.rank_dynamic == 0u ); + ASSERT_TRUE( shape_01.rank == 1u ); + ASSERT_TRUE( shape_01.N0 == 100u ); + + ASSERT_TRUE( shape_11.rank_dynamic == 1u ); + ASSERT_TRUE( shape_11.rank == 1u ); + ASSERT_TRUE( shape_11.N0 == 1000u ); + + ASSERT_TRUE( shape_03.rank_dynamic == 0u ); + ASSERT_TRUE( shape_03.rank == 3u ); + ASSERT_TRUE( shape_03.N0 == 5u ); + ASSERT_TRUE( shape_03.N1 == 6u ); + ASSERT_TRUE( shape_03.N2 == 700u ); + + ASSERT_TRUE( shape_14.rank_dynamic == 1u ); + ASSERT_TRUE( shape_14.rank == 4u ); + ASSERT_TRUE( shape_14.N0 == 0u ); + ASSERT_TRUE( shape_14.N1 == 8u ); + ASSERT_TRUE( shape_14.N2 == 9u ); + ASSERT_TRUE( shape_14.N3 == 900u ); + + ASSERT_TRUE( shape_22.rank_dynamic == 2u ); + ASSERT_TRUE( shape_22.rank == 2u ); + ASSERT_TRUE( shape_22.N0 == 0u ); + ASSERT_TRUE( shape_22.N1 == 0u ); + + ASSERT_TRUE( shape_36.rank_dynamic == 3u ); + ASSERT_TRUE( shape_36.rank == 6u ); + ASSERT_TRUE( shape_36.N0 == 10u ); + ASSERT_TRUE( shape_36.N1 == 20u ); + ASSERT_TRUE( shape_36.N2 == 30u ); + ASSERT_TRUE( shape_36.N3 == 5u ); + ASSERT_TRUE( shape_36.N4 == 6u ); + ASSERT_TRUE( shape_36.N5 == 7u ); + + + ASSERT_TRUE( shape_01 == shape_01 ); + ASSERT_TRUE( shape_11 == shape_11 ); + ASSERT_TRUE( shape_36 == shape_36 ); + ASSERT_TRUE( shape_01 != shape_36 ); + ASSERT_TRUE( shape_22 != shape_36 ); + + //------------------------------------------------------------------------ + + typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutLeft > shape_01_left_offset ; + typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutLeft > shape_11_left_offset ; + typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutLeft > shape_03_left_offset ; + typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutLeft > shape_14_left_offset ; + typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutLeft > shape_22_left_offset ; + typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutLeft > shape_36_left_offset ; + + typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutRight > shape_01_right_offset ; + typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutRight > shape_11_right_offset ; + typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutRight > shape_03_right_offset ; + typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutRight > shape_14_right_offset ; + typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutRight > shape_22_right_offset ; + typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutRight > shape_36_right_offset ; + + ASSERT_TRUE( ! shape_01_left_offset::has_padding ); + ASSERT_TRUE( ! shape_11_left_offset::has_padding ); + ASSERT_TRUE( ! shape_03_left_offset::has_padding ); + ASSERT_TRUE( shape_14_left_offset::has_padding ); + ASSERT_TRUE( shape_22_left_offset::has_padding ); + ASSERT_TRUE( shape_36_left_offset::has_padding ); + + ASSERT_TRUE( ! shape_01_right_offset::has_padding ); + ASSERT_TRUE( ! shape_11_right_offset::has_padding ); + ASSERT_TRUE( ! shape_03_right_offset::has_padding ); + ASSERT_TRUE( ! shape_14_right_offset::has_padding ); + ASSERT_TRUE( shape_22_right_offset::has_padding ); + ASSERT_TRUE( shape_36_right_offset::has_padding ); + + //------------------------------------------------------------------------ + + typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutStride > shape_01_stride_offset ; + typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutStride > shape_36_stride_offset ; + + { + shape_01_stride_offset stride_offset_01 ; + + stride_offset_01.assign( 1, stride_offset_01.N0, 0,0,0,0,0,0,0 ); + + ASSERT_EQ( int(stride_offset_01.S[0]) , int(1) ); + ASSERT_EQ( int(stride_offset_01.S[1]) , int(stride_offset_01.N0) ); + } + + { + shape_36_stride_offset stride_offset_36 ; + + size_t str[7] ; + str[5] = 1 ; + str[4] = str[5] * stride_offset_36.N5 ; + str[3] = str[4] * stride_offset_36.N4 ; + str[2] = str[3] * stride_offset_36.N3 ; + str[1] = str[2] * 100 ; + str[0] = str[1] * 200 ; + str[6] = str[0] * 300 ; + + stride_offset_36.assign( str[0] , str[1] , str[2] , str[3] , str[4] , str[5] , str[6] , 0 , 0 ); + + ASSERT_EQ( size_t(stride_offset_36.S[6]) , size_t(str[6]) ); + ASSERT_EQ( size_t(stride_offset_36.N2) , size_t(100) ); + ASSERT_EQ( size_t(stride_offset_36.N1) , size_t(200) ); + ASSERT_EQ( size_t(stride_offset_36.N0) , size_t(300) ); + } + + //------------------------------------------------------------------------ + + { + const int rank = 6 ; + const int order[] = { 5 , 3 , 1 , 0 , 2 , 4 }; + const unsigned dim[] = { 2 , 3 , 5 , 7 , 11 , 13 }; + Kokkos::LayoutStride stride_6 = Kokkos::LayoutStride::order_dimensions( rank , order , dim ); + size_t n = 1 ; + for ( int i = 0 ; i < rank ; ++i ) { + ASSERT_EQ( size_t(dim[i]) , size_t( stride_6.dimension[i] ) ); + ASSERT_EQ( size_t(n) , size_t( stride_6.stride[ order[i] ] ) ); + n *= dim[order[i]] ; + } + } + + //------------------------------------------------------------------------ +} + +} /* namespace Test */ + +#endif + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eddb81bed5cfaa855dc51a43d4a560bc69030543 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp @@ -0,0 +1,1307 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< class Space > +void test_view_mapping() +{ + typedef typename Space::execution_space ExecSpace ; + + typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<2> dim_s2 ; + typedef Kokkos::Experimental::Impl::ViewDimension<2,3> dim_s2_s3 ; + typedef Kokkos::Experimental::Impl::ViewDimension<2,3,4> dim_s2_s3_s4 ; + + typedef Kokkos::Experimental::Impl::ViewDimension<0> dim_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,3> dim_s0_s3 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,3,4> dim_s0_s3_s4 ; + + typedef Kokkos::Experimental::Impl::ViewDimension<0,0> dim_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,4> dim_s0_s0_s4 ; + + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dim_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0> dim_s0_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0> dim_s0_s0_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0_s0 ; + + // Fully static dimensions should not be larger than an int + ASSERT_LE( sizeof(dim_0) , sizeof(int) ); + ASSERT_LE( sizeof(dim_s2) , sizeof(int) ); + ASSERT_LE( sizeof(dim_s2_s3) , sizeof(int) ); + ASSERT_LE( sizeof(dim_s2_s3_s4) , sizeof(int) ); + + // Rank 1 is size_t + ASSERT_EQ( sizeof(dim_s0) , sizeof(size_t) ); + ASSERT_EQ( sizeof(dim_s0_s3) , sizeof(size_t) ); + ASSERT_EQ( sizeof(dim_s0_s3_s4) , sizeof(size_t) ); + + // Allow for padding + ASSERT_LE( sizeof(dim_s0_s0) , 2 * sizeof(size_t) ); + ASSERT_LE( sizeof(dim_s0_s0_s4) , 2 * sizeof(size_t) ); + + ASSERT_LE( sizeof(dim_s0_s0_s0) , 4 * sizeof(size_t) ); + ASSERT_EQ( sizeof(dim_s0_s0_s0_s0) , 4 * sizeof(unsigned) ); + ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); + ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); + ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); + ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); + + ASSERT_EQ( int(dim_0::rank) , int(0) ); + ASSERT_EQ( int(dim_0::rank_dynamic) , int(0) ); + + ASSERT_EQ( int(dim_s2::rank) , int(1) ); + ASSERT_EQ( int(dim_s2::rank_dynamic) , int(0) ); + + ASSERT_EQ( int(dim_s2_s3::rank) , int(2) ); + ASSERT_EQ( int(dim_s2_s3::rank_dynamic) , int(0) ); + + ASSERT_EQ( int(dim_s2_s3_s4::rank) , int(3) ); + ASSERT_EQ( int(dim_s2_s3_s4::rank_dynamic) , int(0) ); + + ASSERT_EQ( int(dim_s0::rank) , int(1) ); + ASSERT_EQ( int(dim_s0::rank_dynamic) , int(1) ); + + ASSERT_EQ( int(dim_s0_s3::rank) , int(2) ); + ASSERT_EQ( int(dim_s0_s3::rank_dynamic) , int(1) ); + + ASSERT_EQ( int(dim_s0_s3_s4::rank) , int(3) ); + ASSERT_EQ( int(dim_s0_s3_s4::rank_dynamic) , int(1) ); + + ASSERT_EQ( int(dim_s0_s0_s4::rank) , int(3) ); + ASSERT_EQ( int(dim_s0_s0_s4::rank_dynamic) , int(2) ); + + ASSERT_EQ( int(dim_s0_s0_s0::rank) , int(3) ); + ASSERT_EQ( int(dim_s0_s0_s0::rank_dynamic) , int(3) ); + + ASSERT_EQ( int(dim_s0_s0_s0_s0::rank) , int(4) ); + ASSERT_EQ( int(dim_s0_s0_s0_s0::rank_dynamic) , int(4) ); + + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank) , int(5) ); + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank_dynamic) , int(5) ); + + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank) , int(6) ); + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(6) ); + + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) , int(7) ); + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(7) ); + + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) , int(8) ); + ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(8) ); + + dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); + dim_s0_s0 d2( 2, 3, 4, 5, 6, 7, 8, 9 ); + dim_s0_s0_s0 d3( 2, 3, 4, 5, 6, 7, 8, 9 ); + dim_s0_s0_s0_s0 d4( 2, 3, 4, 5, 6, 7, 8, 9 ); + + ASSERT_EQ( d1.N0 , 2 ); + ASSERT_EQ( d2.N0 , 2 ); + ASSERT_EQ( d3.N0 , 2 ); + ASSERT_EQ( d4.N0 , 2 ); + + ASSERT_EQ( d1.N1 , 1 ); + ASSERT_EQ( d2.N1 , 3 ); + ASSERT_EQ( d3.N1 , 3 ); + ASSERT_EQ( d4.N1 , 3 ); + + ASSERT_EQ( d1.N2 , 1 ); + ASSERT_EQ( d2.N2 , 1 ); + ASSERT_EQ( d3.N2 , 4 ); + ASSERT_EQ( d4.N2 , 4 ); + + ASSERT_EQ( d1.N3 , 1 ); + ASSERT_EQ( d2.N3 , 1 ); + ASSERT_EQ( d3.N3 , 1 ); + ASSERT_EQ( d4.N3 , 5 ); + + //---------------------------------------- + + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0 , Kokkos::LayoutStride > stride_s0_s0_s0 ; + + //---------------------------------------- + // Static dimension + { + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutLeft > left_s2_s3_s4 ; + + ASSERT_EQ( sizeof(left_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + + left_s2_s3_s4 off3 ; + + stride_s0_s0_s0 stride3( off3 ); + + ASSERT_EQ( off3.stride_0() , 1 ); + ASSERT_EQ( off3.stride_1() , 2 ); + ASSERT_EQ( off3.stride_2() , 6 ); + ASSERT_EQ( off3.span() , 24 ); + + ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); + ASSERT_EQ( off3.span() , stride3.span() ); + + int offset = 0 ; + + for ( int k = 0 ; k < 4 ; ++k ){ + for ( int j = 0 ; j < 3 ; ++j ){ + for ( int i = 0 ; i < 2 ; ++i , ++offset ){ + ASSERT_EQ( off3(i,j,k) , offset ); + ASSERT_EQ( stride3(i,j,k) , off3(i,j,k) ); + }}} + } + + //---------------------------------------- + // Small dimension is unpadded + { + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + + left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutLeft( 2, 3, 0, 0, 0, 0, 0, 0 ) ); + + stride_s0_s0_s0 stride3( dyn_off3 ); + + ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); + ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); + + const Kokkos::LayoutLeft layout = dyn_off3.layout(); + + ASSERT_EQ( layout.dimension[0] , 2 ); + ASSERT_EQ( layout.dimension[1] , 3 ); + ASSERT_EQ( layout.dimension[2] , 4 ); + ASSERT_EQ( layout.dimension[3] , 1 ); + ASSERT_EQ( layout.dimension[4] , 1 ); + ASSERT_EQ( layout.dimension[5] , 1 ); + ASSERT_EQ( layout.dimension[6] , 1 ); + ASSERT_EQ( layout.dimension[7] , 1 ); + + ASSERT_EQ( stride3.m_dim.rank , 3 ); + ASSERT_EQ( stride3.m_dim.N0 , 2 ); + ASSERT_EQ( stride3.m_dim.N1 , 3 ); + ASSERT_EQ( stride3.m_dim.N2 , 4 ); + ASSERT_EQ( stride3.m_dim.N3 , 1 ); + ASSERT_EQ( stride3.size() , 2 * 3 * 4 ); + + int offset = 0 ; + + for ( int k = 0 ; k < 4 ; ++k ){ + for ( int j = 0 ; j < 3 ; ++j ){ + for ( int i = 0 ; i < 2 ; ++i , ++offset ){ + ASSERT_EQ( offset , dyn_off3(i,j,k) ); + ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); + }}} + + ASSERT_EQ( dyn_off3.span() , offset ); + ASSERT_EQ( stride3.span() , dyn_off3.span() ); + } + + // Large dimension is likely padded + { + constexpr int N0 = 2000 ; + constexpr int N1 = 300 ; + + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + + left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); + + stride_s0_s0_s0 stride3( dyn_off3 ); + + ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); + ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); + + ASSERT_EQ( stride3.m_dim.rank , 3 ); + ASSERT_EQ( stride3.m_dim.N0 , N0 ); + ASSERT_EQ( stride3.m_dim.N1 , N1 ); + ASSERT_EQ( stride3.m_dim.N2 , 4 ); + ASSERT_EQ( stride3.m_dim.N3 , 1 ); + ASSERT_EQ( stride3.size() , N0 * N1 * 4 ); + ASSERT_EQ( stride3.span() , dyn_off3.span() ); + + int offset = 0 ; + + for ( int k = 0 ; k < 4 ; ++k ){ + for ( int j = 0 ; j < N1 ; ++j ){ + for ( int i = 0 ; i < N0 ; ++i ){ + ASSERT_LE( offset , dyn_off3(i,j,k) ); + ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); + offset = dyn_off3(i,j,k) + 1 ; + }}} + + ASSERT_LE( offset , dyn_off3.span() ); + } + + //---------------------------------------- + // Static dimension + { + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutRight > right_s2_s3_s4 ; + + ASSERT_EQ( sizeof(right_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + + right_s2_s3_s4 off3 ; + + stride_s0_s0_s0 stride3( off3 ); + + ASSERT_EQ( off3.stride_0() , 12 ); + ASSERT_EQ( off3.stride_1() , 4 ); + ASSERT_EQ( off3.stride_2() , 1 ); + + ASSERT_EQ( off3.dimension_0() , stride3.dimension_0() ); + ASSERT_EQ( off3.dimension_1() , stride3.dimension_1() ); + ASSERT_EQ( off3.dimension_2() , stride3.dimension_2() ); + ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); + ASSERT_EQ( off3.span() , stride3.span() ); + + int offset = 0 ; + + for ( int i = 0 ; i < 2 ; ++i ){ + for ( int j = 0 ; j < 3 ; ++j ){ + for ( int k = 0 ; k < 4 ; ++k , ++offset ){ + ASSERT_EQ( off3(i,j,k) , offset ); + ASSERT_EQ( off3(i,j,k) , stride3(i,j,k) ); + }}} + + ASSERT_EQ( off3.span() , offset ); + } + + //---------------------------------------- + // Small dimension is unpadded + { + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + + right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutRight( 2, 3, 0, 0, 0, 0, 0, 0 ) ); + + stride_s0_s0_s0 stride3( dyn_off3 ); + + ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); + ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); + + ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span() , stride3.span() ); + + int offset = 0 ; + + for ( int i = 0 ; i < 2 ; ++i ){ + for ( int j = 0 ; j < 3 ; ++j ){ + for ( int k = 0 ; k < 4 ; ++k , ++offset ){ + ASSERT_EQ( offset , dyn_off3(i,j,k) ); + ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); + }}} + + ASSERT_EQ( dyn_off3.span() , offset ); + } + + // Large dimension is likely padded + { + constexpr int N0 = 2000 ; + constexpr int N1 = 300 ; + + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + + right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); + + stride_s0_s0_s0 stride3( dyn_off3 ); + + ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); + ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); + + ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span() , stride3.span() ); + + int offset = 0 ; + + for ( int i = 0 ; i < N0 ; ++i ){ + for ( int j = 0 ; j < N1 ; ++j ){ + for ( int k = 0 ; k < 4 ; ++k ){ + ASSERT_LE( offset , dyn_off3(i,j,k) ); + ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); + offset = dyn_off3(i,j,k) + 1 ; + }}} + + ASSERT_LE( offset , dyn_off3.span() ); + } + + //---------------------------------------- + // Subview + { + // Mapping rank 4 to rank 3 + typedef Kokkos::Experimental::Impl::SubviewExtents<4,3> SubviewExtents ; + + constexpr int N0 = 1000 ; + constexpr int N1 = 2000 ; + constexpr int N2 = 3000 ; + constexpr int N3 = 4000 ; + + Kokkos::Experimental::Impl::ViewDimension<N0,N1,N2,N3> dim ; + + SubviewExtents tmp( dim + , N0 / 2 + , Kokkos::Experimental::ALL + , std::pair<int,int>( N2 / 4 , 10 + N2 / 4 ) + , Kokkos::pair<int,int>( N3 / 4 , 20 + N3 / 4 ) + ); + + ASSERT_EQ( tmp.domain_offset(0) , N0 / 2 ); + ASSERT_EQ( tmp.domain_offset(1) , 0 ); + ASSERT_EQ( tmp.domain_offset(2) , N2 / 4 ); + ASSERT_EQ( tmp.domain_offset(3) , N3 / 4 ); + + ASSERT_EQ( tmp.range_index(0) , 1 ); + ASSERT_EQ( tmp.range_index(1) , 2 ); + ASSERT_EQ( tmp.range_index(2) , 3 ); + + ASSERT_EQ( tmp.range_extent(0) , N1 ); + ASSERT_EQ( tmp.range_extent(1) , 10 ); + ASSERT_EQ( tmp.range_extent(2) , 20 ); + } + //---------------------------------------- + { + constexpr int N0 = 2000 ; + constexpr int N1 = 300 ; + + constexpr int sub_N0 = 1000 ; + constexpr int sub_N1 = 200 ; + constexpr int sub_N2 = 4 ; + + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + + left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); + + Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + sub( dyn_off3.m_dim + , Kokkos::pair<int,int>(0,sub_N0) + , Kokkos::pair<int,int>(0,sub_N1) + , Kokkos::pair<int,int>(0,sub_N2) + ); + + stride_s0_s0_s0 stride3( dyn_off3 , sub ); + + ASSERT_EQ( stride3.dimension_0() , sub_N0 ); + ASSERT_EQ( stride3.dimension_1() , sub_N1 ); + ASSERT_EQ( stride3.dimension_2() , sub_N2 ); + ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + + ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); + + for ( int k = 0 ; k < sub_N2 ; ++k ){ + for ( int j = 0 ; j < sub_N1 ; ++j ){ + for ( int i = 0 ; i < sub_N0 ; ++i ){ + ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); + }}} + } + + { + constexpr int N0 = 2000 ; + constexpr int N1 = 300 ; + + constexpr int sub_N0 = 1000 ; + constexpr int sub_N1 = 200 ; + constexpr int sub_N2 = 4 ; + + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + + right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); + + Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + sub( dyn_off3.m_dim + , Kokkos::pair<int,int>(0,sub_N0) + , Kokkos::pair<int,int>(0,sub_N1) + , Kokkos::pair<int,int>(0,sub_N2) + ); + + stride_s0_s0_s0 stride3( dyn_off3 , sub ); + + ASSERT_EQ( stride3.dimension_0() , sub_N0 ); + ASSERT_EQ( stride3.dimension_1() , sub_N1 ); + ASSERT_EQ( stride3.dimension_2() , sub_N2 ); + ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + + ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); + + for ( int i = 0 ; i < sub_N0 ; ++i ){ + for ( int j = 0 ; j < sub_N1 ; ++j ){ + for ( int k = 0 ; k < sub_N2 ; ++k ){ + ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); + }}} + } + + //---------------------------------------- + // view data analysis + { + using namespace Kokkos::Experimental::Impl ; + static_assert( rank_dynamic<>::value == 0 , "" ); + static_assert( rank_dynamic<1>::value == 0 , "" ); + static_assert( rank_dynamic<0>::value == 1 , "" ); + static_assert( rank_dynamic<0,1>::value == 1 , "" ); + static_assert( rank_dynamic<0,0,1>::value == 2 , "" ); + } + + { + using namespace Kokkos::Experimental::Impl ; + + typedef ViewArrayAnalysis< int[] > a_int_r1 ; + typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5 ; + typedef ViewArrayAnalysis< const int[] > a_const_int_r1 ; + typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5 ; + + static_assert( a_int_r1::dimension::rank == 1 , "" ); + static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" ); + static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" ); + static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" ); + + static_assert( a_const_int_r1::dimension::rank == 1 , "" ); + static_assert( a_const_int_r1::dimension::rank_dynamic == 1 , "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension , ViewDimension<0> >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); + + static_assert( a_const_int_r5::dimension::rank == 5 , "" ); + static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" ); + + static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); + + static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" ); + + static_assert( a_int_r5::dimension::rank == 5 , "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 2 , "" ); + static_assert( std::is_same< typename a_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + } + + { + using namespace Kokkos::Experimental::Impl ; + + typedef int t_i4[4] ; + + // Dimensions of t_i4 are appended to the multdimensional array. + typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5 ; + + static_assert( a_int_r5::dimension::rank == 5 , "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 3 , "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); + static_assert( a_int_r5::dimension::ArgN2 == 0 , "" ); + static_assert( a_int_r5::dimension::ArgN3 == 3 , "" ); + static_assert( a_int_r5::dimension::ArgN4 == 4 , "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + } + + { + using namespace Kokkos::Experimental::Impl ; + + typedef ViewDataAnalysis< const int[] , void > a_const_int_r1 ; + + static_assert( std::is_same< typename a_const_int_r1::specialize , void >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension , Kokkos::Experimental::Impl::ViewDimension<0> >::value , "" ); + + static_assert( std::is_same< typename a_const_int_r1::type , const int * >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::value_type , const int >::value , "" ); + + static_assert( std::is_same< typename a_const_int_r1::scalar_array_type , const int * >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::const_type , const int * >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::const_value_type , const int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::const_scalar_array_type , const int * >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_type , int * >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); + + typedef ViewDataAnalysis< const int**[4] , void > a_const_int_r3 ; + + static_assert( std::is_same< typename a_const_int_r3::specialize , void >::value , "" ); + + static_assert( std::is_same< typename a_const_int_r3::dimension , Kokkos::Experimental::Impl::ViewDimension<0,0,4> >::value , "" ); + + static_assert( std::is_same< typename a_const_int_r3::type , const int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::value_type , const int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::scalar_array_type , const int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::const_type , const int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::const_value_type , const int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::const_scalar_array_type , const int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_type , int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_value_type , int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_scalar_array_type , int**[4] >::value , "" ); + + + // std::cout << "typeid(const int**[4]).name() = " << typeid(const int**[4]).name() << std::endl ; + } + + //---------------------------------------- + + { + constexpr int N = 10 ; + + typedef Kokkos::Experimental::View<int*,Space> T ; + typedef Kokkos::Experimental::View<const int*,Space> C ; + + int data[N] ; + + T vr1(data,N); // view of non-const + C cr1(vr1); // view of const from view of non-const + C cr2( (const int *) data , N ); + + // Generate static_assert error: + // T tmp( cr1 ); + + ASSERT_EQ( vr1.span() , N ); + ASSERT_EQ( cr1.span() , N ); + ASSERT_EQ( vr1.data() , & data[0] ); + ASSERT_EQ( cr1.data() , & data[0] ); + + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); + + ASSERT_EQ( T::Rank , 1 ); + + ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename C::scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_scalar_array_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type , int >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename C::memory_space , typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::reference_type , const int & >::value ) ); + + ASSERT_EQ( C::Rank , 1 ); + + ASSERT_EQ( vr1.dimension_0() , N ); + + if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) { + for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ; + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + + { + T tmp( vr1 ); + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); + for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + } + + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + } + } + + + { + constexpr int N = 10 ; + typedef Kokkos::Experimental::View<int*,Space> T ; + typedef Kokkos::Experimental::View<const int*,Space> C ; + + T vr1("vr1",N); + C cr1(vr1); + + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + + ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); + ASSERT_EQ( T::Rank , 1 ); + + ASSERT_EQ( vr1.dimension_0() , N ); + + if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) { + for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ; + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + + { + T tmp( vr1 ); + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); + for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + } + + for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + } + } + + // Testing proper handling of zero-length allocations + { + constexpr int N = 0 ; + typedef Kokkos::Experimental::View<int*,Space> T ; + typedef Kokkos::Experimental::View<const int*,Space> C ; + + T vr1("vr1",N); + C cr1(vr1); + + ASSERT_EQ( vr1.dimension_0() , 0 ); + ASSERT_EQ( cr1.dimension_0() , 0 ); + } + + + // Testing using space instance for allocation. + // The execution space of the memory space must be available for view data initialization + + if ( std::is_same< ExecSpace , typename ExecSpace::memory_space::execution_space >::value ) { + + using namespace Kokkos::Experimental ; + + typedef typename ExecSpace::memory_space memory_space ; + typedef View<int*,memory_space> V ; + + constexpr int N = 10 ; + + memory_space mem_space ; + + V v( "v" , N ); + V va( view_alloc() , N ); + V vb( view_alloc( "vb" ) , N ); + V vc( view_alloc( "vc" , AllowPadding ) , N ); + V vd( view_alloc( "vd" , WithoutInitializing ) , N ); + V ve( view_alloc( "ve" , WithoutInitializing , AllowPadding ) , N ); + V vf( view_alloc( "vf" , mem_space , WithoutInitializing , AllowPadding ) , N ); + V vg( view_alloc( mem_space , "vg" , WithoutInitializing , AllowPadding ) , N ); + V vh( view_alloc( WithoutInitializing , AllowPadding ) , N ); + V vi( view_alloc( WithoutInitializing ) , N ); + V vj( view_alloc( std::string("vj") , AllowPadding ) , N ); + V vk( view_alloc( mem_space , std::string("vk") , AllowPadding ) , N ); + } + + { + typedef Kokkos::Experimental::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace> traits_t ; + typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dims_t ; + typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride > offset_t ; + + Kokkos::LayoutStride stride ; + + stride.dimension[0] = 3 ; + stride.dimension[1] = 4 ; + stride.dimension[2] = 5 ; + stride.stride[0] = 4 ; + stride.stride[1] = 1 ; + stride.stride[2] = 12 ; + + const offset_t offset( std::integral_constant<unsigned,0>() , stride ); + + ASSERT_EQ( offset.dimension_0() , 3 ); + ASSERT_EQ( offset.dimension_1() , 4 ); + ASSERT_EQ( offset.dimension_2() , 5 ); + + ASSERT_EQ( offset.stride_0() , 4 ); + ASSERT_EQ( offset.stride_1() , 1 ); + ASSERT_EQ( offset.stride_2() , 12 ); + + ASSERT_EQ( offset.span() , 60 ); + ASSERT_TRUE( offset.span_is_contiguous() ); + + Kokkos::Experimental::Impl::ViewMapping< traits_t , void > + v( Kokkos::Experimental::Impl::ViewCtorProp<int*>((int*)0), stride ); + } + + { + typedef Kokkos::Experimental::View<int**,Space> V ; + typedef typename V::HostMirror M ; + + constexpr int N0 = 10 ; + constexpr int N1 = 11 ; + + V a("a",N0,N1); + M b = Kokkos::Experimental::create_mirror(a); + M c = Kokkos::Experimental::create_mirror_view(a); + M d ; + + for ( int i0 = 0 ; i0 < N0 ; ++i0 ) + for ( int i1 = 0 ; i1 < N1 ; ++i1 ) + b(i0,i1) = 1 + i0 + i1 * N0 ; + + Kokkos::Experimental::deep_copy( a , b ); + Kokkos::Experimental::deep_copy( c , a ); + + for ( int i0 = 0 ; i0 < N0 ; ++i0 ) + for ( int i1 = 0 ; i1 < N1 ; ++i1 ) + ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + + Kokkos::Experimental::resize( b , 5 , 6 ); + Kokkos::Experimental::realloc( c , 5 , 6 ); + Kokkos::Experimental::realloc( d , 5 , 6 ); + + ASSERT_EQ( b.dimension_0() , 5 ); + ASSERT_EQ( b.dimension_1() , 6 ); + ASSERT_EQ( c.dimension_0() , 5 ); + ASSERT_EQ( c.dimension_1() , 6 ); + ASSERT_EQ( d.dimension_0() , 5 ); + ASSERT_EQ( d.dimension_1() , 6 ); + } + + { + typedef Kokkos::Experimental::View<int*,Space> V ; + typedef Kokkos::Experimental::View<int*,Space,Kokkos::MemoryUnmanaged> U ; + + + V a("a",10); + + ASSERT_EQ( a.use_count() , 1 ); + + V b = a ; + + ASSERT_EQ( a.use_count() , 2 ); + ASSERT_EQ( b.use_count() , 2 ); + + { + U c = b ; // 'c' is compile-time unmanaged + + ASSERT_EQ( a.use_count() , 2 ); + ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( c.use_count() , 2 ); + + V d = c ; // 'd' is run-time unmanaged + + ASSERT_EQ( a.use_count() , 2 ); + ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( c.use_count() , 2 ); + ASSERT_EQ( d.use_count() , 2 ); + } + + ASSERT_EQ( a.use_count() , 2 ); + ASSERT_EQ( b.use_count() , 2 ); + + b = V(); + + ASSERT_EQ( a.use_count() , 1 ); + ASSERT_EQ( b.use_count() , 0 ); + +#if KOKKOS_USING_EXP_VIEW && ! defined ( KOKKOS_CUDA_USE_LAMBDA ) + /* Cannot launch host lambda when CUDA lambda is enabled */ + + typedef typename Kokkos::Impl::is_space< Space >::host_execution_space + host_exec_space ; + + Kokkos::parallel_for( + Kokkos::RangePolicy< host_exec_space >(0,10) , + KOKKOS_LAMBDA( int i ){ + // 'a' is captured by copy and the capture mechanism + // converts 'a' to an unmanaged copy. + // When the parallel dispatch accepts a move for the lambda + // this count should become 1 + ASSERT_EQ( a.use_count() , 2 ); + V x = a ; + ASSERT_EQ( a.use_count() , 2 ); + ASSERT_EQ( x.use_count() , 2 ); + }); +#endif /* #if ! defined ( KOKKOS_CUDA_USE_LAMBDA ) */ + } +} + +template< class Space > +struct TestViewMappingSubview +{ + typedef typename Space::execution_space ExecSpace ; + typedef typename Space::memory_space MemSpace ; + + typedef Kokkos::pair<int,int> range ; + + enum { AN = 10 }; + typedef Kokkos::Experimental::View<int*,ExecSpace> AT ; + typedef Kokkos::Experimental::View<const int*,ExecSpace> ACT ; + typedef Kokkos::Experimental::Subview< AT , range > AS ; + + enum { BN0 = 10 , BN1 = 11 , BN2 = 12 }; + typedef Kokkos::Experimental::View<int***,ExecSpace> BT ; + typedef Kokkos::Experimental::Subview< BT , range , range , range > BS ; + + enum { CN0 = 10 , CN1 = 11 , CN2 = 12 }; + typedef Kokkos::Experimental::View<int***[13][14],ExecSpace> CT ; + typedef Kokkos::Experimental::Subview< CT , range , range , range , int , int > CS ; + + enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 }; + typedef Kokkos::Experimental::View<int***[DN3][DN4],ExecSpace> DT ; + typedef Kokkos::Experimental::Subview< DT , int , range , range , range , int > DS ; + + + typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace> DLT ; + typedef Kokkos::Experimental::Subview< DLT , range , int , int , int , int > DLS1 ; + + static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value + , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); + + typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutRight,ExecSpace> DRT ; + typedef Kokkos::Experimental::Subview< DRT , int , int , int , int , range > DRS1 ; + + static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value + , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); + + AT Aa ; + AS Ab ; + ACT Ac ; + BT Ba ; + BS Bb ; + CT Ca ; + CS Cb ; + DT Da ; + DS Db ; + + TestViewMappingSubview() + : Aa("Aa",AN) + , Ab( Kokkos::Experimental::subview( Aa , std::pair<int,int>(1,AN-1) ) ) + , Ac( Aa , std::pair<int,int>(1,AN-1) ) + , Ba("Ba",BN0,BN1,BN2) + , Bb( Kokkos::Experimental::subview( Ba + , std::pair<int,int>(1,BN0-1) + , std::pair<int,int>(1,BN1-1) + , std::pair<int,int>(1,BN2-1) + ) ) + , Ca("Ca",CN0,CN1,CN2) + , Cb( Kokkos::Experimental::subview( Ca + , std::pair<int,int>(1,CN0-1) + , std::pair<int,int>(1,CN1-1) + , std::pair<int,int>(1,CN2-1) + , 1 + , 2 + ) ) + , Da("Da",DN0,DN1,DN2) + , Db( Kokkos::Experimental::subview( Da + , 1 + , std::pair<int,int>(1,DN1-1) + , std::pair<int,int>(1,DN2-1) + , std::pair<int,int>(1,DN3-1) + , 2 + ) ) + { + } + + + KOKKOS_INLINE_FUNCTION + void operator()( const int , long & error_count ) const + { + auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa , Kokkos::pair<int,int>(1,AN-1) ); + + for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ab[i-1] ) ++error_count ; + for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ac[i-1] ) ++error_count ; + for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ad[i-1] ) ++error_count ; + + for ( int i2 = 1 ; i2 < BN2-1 ; ++i2 ) { + for ( int i1 = 1 ; i1 < BN1-1 ; ++i1 ) { + for ( int i0 = 1 ; i0 < BN0-1 ; ++i0 ) { + if ( & Ba(i0,i1,i2) != & Bb(i0-1,i1-1,i2-1) ) ++error_count ; + }}} + + for ( int i2 = 1 ; i2 < CN2-1 ; ++i2 ) { + for ( int i1 = 1 ; i1 < CN1-1 ; ++i1 ) { + for ( int i0 = 1 ; i0 < CN0-1 ; ++i0 ) { + if ( & Ca(i0,i1,i2,1,2) != & Cb(i0-1,i1-1,i2-1) ) ++error_count ; + }}} + + for ( int i2 = 1 ; i2 < DN3-1 ; ++i2 ) { + for ( int i1 = 1 ; i1 < DN2-1 ; ++i1 ) { + for ( int i0 = 1 ; i0 < DN1-1 ; ++i0 ) { + if ( & Da(1,i0,i1,i2,2) != & Db(i0-1,i1-1,i2-1) ) ++error_count ; + }}} + } + + static void run() + { + TestViewMappingSubview self ; + + ASSERT_EQ( self.Aa.dimension_0() , AN ); + ASSERT_EQ( self.Ab.dimension_0() , AN - 2 ); + ASSERT_EQ( self.Ac.dimension_0() , AN - 2 ); + ASSERT_EQ( self.Ba.dimension_0() , BN0 ); + ASSERT_EQ( self.Ba.dimension_1() , BN1 ); + ASSERT_EQ( self.Ba.dimension_2() , BN2 ); + ASSERT_EQ( self.Bb.dimension_0() , BN0 - 2 ); + ASSERT_EQ( self.Bb.dimension_1() , BN1 - 2 ); + ASSERT_EQ( self.Bb.dimension_2() , BN2 - 2 ); + + ASSERT_EQ( self.Ca.dimension_0() , CN0 ); + ASSERT_EQ( self.Ca.dimension_1() , CN1 ); + ASSERT_EQ( self.Ca.dimension_2() , CN2 ); + ASSERT_EQ( self.Ca.dimension_3() , 13 ); + ASSERT_EQ( self.Ca.dimension_4() , 14 ); + ASSERT_EQ( self.Cb.dimension_0() , CN0 - 2 ); + ASSERT_EQ( self.Cb.dimension_1() , CN1 - 2 ); + ASSERT_EQ( self.Cb.dimension_2() , CN2 - 2 ); + + ASSERT_EQ( self.Da.dimension_0() , DN0 ); + ASSERT_EQ( self.Da.dimension_1() , DN1 ); + ASSERT_EQ( self.Da.dimension_2() , DN2 ); + ASSERT_EQ( self.Da.dimension_3() , DN3 ); + ASSERT_EQ( self.Da.dimension_4() , DN4 ); + + ASSERT_EQ( self.Db.dimension_0() , DN1 - 2 ); + ASSERT_EQ( self.Db.dimension_1() , DN2 - 2 ); + ASSERT_EQ( self.Db.dimension_2() , DN3 - 2 ); + + ASSERT_EQ( self.Da.stride_1() , self.Db.stride_0() ); + ASSERT_EQ( self.Da.stride_2() , self.Db.stride_1() ); + ASSERT_EQ( self.Da.stride_3() , self.Db.stride_2() ); + + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >(0,1) , self , error_count ); + ASSERT_EQ( error_count , 0 ); + } + +}; + +template< class Space > +void test_view_mapping_subview() +{ + typedef typename Space::execution_space ExecSpace ; + + TestViewMappingSubview< ExecSpace >::run(); +} + +/*--------------------------------------------------------------------------*/ + +template< class ViewType > +struct TestViewMapOperator { + + static_assert( ViewType::reference_type_is_lvalue_reference + , "Test only valid for lvalue reference type" ); + + const ViewType v ; + + KOKKOS_INLINE_FUNCTION + void test_left( size_t i0 , long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0 ; + + for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) + { + const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; + if ( d < offset ) ++error_count ; + offset = d ; + } + + if ( v.span() <= size_t(offset) ) ++error_count ; + } + + KOKKOS_INLINE_FUNCTION + void test_right( size_t i0 , long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0 ; + + for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) + for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) + for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) + for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) + for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) + for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) + for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) + { + const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; + if ( d < offset ) ++error_count ; + offset = d ; + } + + if ( v.span() <= size_t(offset) ) ++error_count ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( size_t i , long & error_count ) const + { + if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutLeft >::value ) + test_left(i,error_count); + else if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutRight >::value ) + test_right(i,error_count); + } + + constexpr static size_t N0 = 10 ; + constexpr static size_t N1 = 9 ; + constexpr static size_t N2 = 8 ; + constexpr static size_t N3 = 7 ; + constexpr static size_t N4 = 6 ; + constexpr static size_t N5 = 5 ; + constexpr static size_t N6 = 4 ; + constexpr static size_t N7 = 3 ; + + TestViewMapOperator() : v( "Test" , N0, N1, N2, N3, N4, N5, N6, N7 ) {} + + static void run() + { + TestViewMapOperator self ; + + ASSERT_EQ( self.v.dimension_0() , ( 0 < ViewType::rank ? N0 : 1 ) ); + ASSERT_EQ( self.v.dimension_1() , ( 1 < ViewType::rank ? N1 : 1 ) ); + ASSERT_EQ( self.v.dimension_2() , ( 2 < ViewType::rank ? N2 : 1 ) ); + ASSERT_EQ( self.v.dimension_3() , ( 3 < ViewType::rank ? N3 : 1 ) ); + ASSERT_EQ( self.v.dimension_4() , ( 4 < ViewType::rank ? N4 : 1 ) ); + ASSERT_EQ( self.v.dimension_5() , ( 5 < ViewType::rank ? N5 : 1 ) ); + ASSERT_EQ( self.v.dimension_6() , ( 6 < ViewType::rank ? N6 : 1 ) ); + ASSERT_EQ( self.v.dimension_7() , ( 7 < ViewType::rank ? N7 : 1 ) ); + + ASSERT_LE( self.v.dimension_0()* + self.v.dimension_1()* + self.v.dimension_2()* + self.v.dimension_3()* + self.v.dimension_4()* + self.v.dimension_5()* + self.v.dimension_6()* + self.v.dimension_7() + , self.v.span() ); + + long error_count ; + Kokkos::RangePolicy< typename ViewType::execution_space > range(0,self.v.dimension_0()); + Kokkos::parallel_reduce( range , self , error_count ); + ASSERT_EQ( 0 , error_count ); + } +}; + + +template< class Space > +void test_view_mapping_operator() +{ + typedef typename Space::execution_space ExecSpace ; + + TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run(); + + TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run(); +} + +/*--------------------------------------------------------------------------*/ + +template< class Space > +struct TestViewMappingAtomic { + typedef typename Space::execution_space ExecSpace ; + typedef typename Space::memory_space MemSpace ; + + typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait ; + + typedef Kokkos::Experimental::View< int * , ExecSpace > T ; + typedef Kokkos::Experimental::View< int * , ExecSpace , mem_trait > T_atom ; + + T x ; + T_atom x_atom ; + + constexpr static size_t N = 100000 ; + + struct TagInit {}; + struct TagUpdate {}; + struct TagVerify {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit & , const int i ) const + { x(i) = i ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagUpdate & , const int i ) const + { x_atom(i%2) += 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagVerify & , const int i , long & error_count ) const + { + if ( i < 2 ) { if ( x(i) != int(i + N / 2) ) ++error_count ; } + else { if ( x(i) != int(i) ) ++error_count ; } + } + + TestViewMappingAtomic() + : x("x",N) + , x_atom( x ) + {} + + static void run() + { + ASSERT_TRUE( T::reference_type_is_lvalue_reference ); + ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); + + TestViewMappingAtomic self ; + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagInit >(0,N) , self ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagUpdate >(0,N) , self ); + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagVerify >(0,N) , self , error_count ); + ASSERT_EQ( 0 , error_count ); + } +}; + +/*--------------------------------------------------------------------------*/ + +template< class Space > +struct TestViewMappingClassValue { + typedef typename Space::execution_space ExecSpace ; + typedef typename Space::memory_space MemSpace ; + + struct ValueType { + KOKKOS_INLINE_FUNCTION + ValueType() + { +#if 0 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) + printf("TestViewMappingClassValue construct on Cuda\n"); +#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + printf("TestViewMappingClassValue construct on Host\n"); +#else + printf("TestViewMappingClassValue construct unknown\n"); +#endif +#endif + } + KOKKOS_INLINE_FUNCTION + ~ValueType() + { +#if 0 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) + printf("TestViewMappingClassValue destruct on Cuda\n"); +#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + printf("TestViewMappingClassValue destruct on Host\n"); +#else + printf("TestViewMappingClassValue destruct unknown\n"); +#endif +#endif + } + }; + + static void run() + { + using namespace Kokkos::Experimental ; + ExecSpace::fence(); + { + View< ValueType , ExecSpace > a("a"); + ExecSpace::fence(); + } + ExecSpace::fence(); + } +}; + +} /* namespace Test */ + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9b23a5d5597e2260e1a73b9f9b5b6b50a911567e --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp @@ -0,0 +1,163 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +template< class Space > +struct NestedView { + + Kokkos::View<int*,Space> member ; + +public: + + KOKKOS_INLINE_FUNCTION + NestedView() : member() + {} + + KOKKOS_INLINE_FUNCTION + NestedView & operator = ( const Kokkos::View<int*,Space> & lhs ) + { + member = lhs ; + if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 ); + return *this ; + } + + KOKKOS_INLINE_FUNCTION + ~NestedView() + { + if ( member.dimension_0() ) { + Kokkos::atomic_add( & member(0) , -1 ); + } + } +}; + +template< class Space > +struct NestedViewFunctor { + + Kokkos::View< NestedView<Space> * , Space > nested ; + Kokkos::View<int*,Space> array ; + + NestedViewFunctor( + const Kokkos::View< NestedView<Space> * , Space > & arg_nested , + const Kokkos::View<int*,Space> & arg_array ) + : nested( arg_nested ) + , array( arg_array ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( int i ) const + { nested[i] = array ; } +}; + + +template< class Space > +void view_nested_view() +{ + Kokkos::View<int*,Space> tracking("tracking",1); + + typename Kokkos::View<int*,Space>::HostMirror + host_tracking = Kokkos::create_mirror( tracking ); + + { + Kokkos::View< NestedView<Space> * , Space > a("a_nested_view",2); + + Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( a , tracking ) ); + Kokkos::deep_copy( host_tracking , tracking ); + ASSERT_EQ( 2 , host_tracking(0) ); + + Kokkos::View< NestedView<Space> * , Space > b("b_nested_view",2); + Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( b , tracking ) ); + Kokkos::deep_copy( host_tracking , tracking ); + ASSERT_EQ( 4 , host_tracking(0) ); + + } + Kokkos::deep_copy( host_tracking , tracking ); + +#if KOKKOS_USING_EXP_VIEW + ASSERT_EQ( 0 , host_tracking(0) ); +#endif + +} + +} + +#if ! KOKKOS_USING_EXP_VIEW + +namespace Kokkos { +namespace Impl { + +template< class ExecSpace , class S > +struct ViewDefaultConstruct< ExecSpace , Test::NestedView<S> , true > +{ + typedef Test::NestedView<S> type ; + type * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { new(m_ptr+i) type(); } + + ViewDefaultConstruct( type * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif + +/*--------------------------------------------------------------------------*/ + diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3846354b8c368f5c8505d84b4931a9105a6a14aa --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -0,0 +1,874 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <stdexcept> +#include <sstream> +#include <iostream> + +/*--------------------------------------------------------------------------*/ + +namespace TestViewSubview { + +template<class Layout, class Space> +struct getView { + static + Kokkos::View<double**,Layout,Space> get(int n, int m) { + return Kokkos::View<double**,Layout,Space>("G",n,m); + } +}; + +template<class Space> +struct getView<Kokkos::LayoutStride,Space> { + static + Kokkos::View<double**,Kokkos::LayoutStride,Space> get(int n, int m) { + const int rank = 2 ; + const int order[] = { 0, 1 }; + const unsigned dim[] = { unsigned(n), unsigned(m) }; + Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank , order , dim ); + return Kokkos::View<double**,Kokkos::LayoutStride,Space>("G",stride); + } +}; + +template<class ViewType, class Space> +struct fill_1D { + typedef typename Space::execution_space execution_space; + typedef typename ViewType::size_type size_type; + ViewType a; + double val; + fill_1D(ViewType a_, double val_):a(a_),val(val_) { + } + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + a(i) = val; + } +}; + +template<class ViewType, class Space> +struct fill_2D { + typedef typename Space::execution_space execution_space; + typedef typename ViewType::size_type size_type; + ViewType a; + double val; + fill_2D(ViewType a_, double val_):a(a_),val(val_) { + } + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const{ + for(int j = 0; j < static_cast<int>(a.dimension_1()); j++) + a(i,j) = val; + } +}; + +template<class Layout, class Space> +void test_auto_1d () +{ + typedef Kokkos::View<double**, Layout, Space> mv_type; + typedef typename mv_type::size_type size_type; + const double ZERO = 0.0; + const double ONE = 1.0; + const double TWO = 2.0; + + const size_type numRows = 10; + const size_type numCols = 3; + + mv_type X = getView<Layout,Space>::get(numRows, numCols); + typename mv_type::HostMirror X_h = Kokkos::create_mirror_view (X); + + fill_2D<mv_type,Space> f1(X, ONE); + Kokkos::parallel_for(X.dimension_0(),f1); + Kokkos::deep_copy (X_h, X); + for (size_type j = 0; j < numCols; ++j) { + for (size_type i = 0; i < numRows; ++i) { + ASSERT_TRUE(X_h(i,j) == ONE); + } + } + + fill_2D<mv_type,Space> f2(X, 0.0); + Kokkos::parallel_for(X.dimension_0(),f2); + Kokkos::deep_copy (X_h, X); + for (size_type j = 0; j < numCols; ++j) { + for (size_type i = 0; i < numRows; ++i) { + ASSERT_TRUE(X_h(i,j) == ZERO); + } + } + + fill_2D<mv_type,Space> f3(X, TWO); + Kokkos::parallel_for(X.dimension_0(),f3); + Kokkos::deep_copy (X_h, X); + for (size_type j = 0; j < numCols; ++j) { + for (size_type i = 0; i < numRows; ++i) { + ASSERT_TRUE(X_h(i,j) == TWO); + } + } + + for (size_type j = 0; j < numCols; ++j) { + auto X_j = Kokkos::subview (X, Kokkos::ALL(), j); + + fill_1D<decltype(X_j),Space> f4(X_j, ZERO); + Kokkos::parallel_for(X_j.dimension_0(),f4); + Kokkos::deep_copy (X_h, X); + for (size_type i = 0; i < numRows; ++i) { + ASSERT_TRUE(X_h(i,j) == ZERO); + } + + for (size_type jj = 0; jj < numCols; ++jj) { + auto X_jj = Kokkos::subview (X, Kokkos::ALL(), jj); + fill_1D<decltype(X_jj),Space> f5(X_jj, ONE); + Kokkos::parallel_for(X_jj.dimension_0(),f5); + Kokkos::deep_copy (X_h, X); + for (size_type i = 0; i < numRows; ++i) { + ASSERT_TRUE(X_h(i,jj) == ONE); + } + } + } +} + +template<class LD, class LS, class Space> +void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int m) { + Kokkos::View<double**,LS,Space> l2d("l2d",n,m); + + int col = n>2?2:0; + int row = m>2?2:0; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(a) { + Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL(),row); + ASSERT_TRUE( & l1da(0) == & l2d(0,row) ); + if(n>1) + ASSERT_TRUE( & l1da(1) == & l2d(1,row) ); + } + if(b && n>13) { + Kokkos::View<double*,LD,Space> l1db = Kokkos::subview(l2d,std::pair<unsigned,unsigned>(2,13),row); + ASSERT_TRUE( & l1db(0) == & l2d(2,row) ); + ASSERT_TRUE( & l1db(1) == & l2d(3,row) ); + } + if(c) { + Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL()); + ASSERT_TRUE( & l1dc(0) == & l2d(col,0) ); + if(m>1) + ASSERT_TRUE( & l1dc(1) == & l2d(col,1) ); + } + if(d && m>13) { + Kokkos::View<double*,LD,Space> l1dd = Kokkos::subview(l2d,col,std::pair<unsigned,unsigned>(2,13)); + ASSERT_TRUE( & l1dd(0) == & l2d(col,2) ); + ASSERT_TRUE( & l1dd(1) == & l2d(col,3) ); + } + } + +} + +template<class Space > +void test_1d_strided_assignment() { + test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutLeft,Space>(true,true,true,true,17,3); + test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutRight,Space>(true,true,true,true,17,3); + + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3); + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,17,3); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,17,3); + + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1); + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1); + + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(true,true,true,true,17,1); + test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,1,17); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,1,17); + test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(true,true,true,true,17,1); +} + +template< class Space > +void test_left_0() +{ + typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space > + view_static_8_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_static_8_type x_static_8("x_static_left_8"); + + ASSERT_TRUE( x_static_8.is_contiguous() ); + + Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); + + Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = + Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 ); + + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x1(1) == & x_static_8(1,1,2,3,0,1,2,3) ); + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = + Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,3,1,1,2,3) ); + ASSERT_TRUE( & x2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + + // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = + Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); + ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + + Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ + , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ + ); + + ASSERT_TRUE( ! sx4.is_contiguous() ); + + for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) + for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) + for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) + for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { + ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); + } + + } +} + +template< class Space > +void test_left_1() +{ + typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space > + view_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_type x8("x_left_8",2,3,4,5); + + ASSERT_TRUE( x8.is_contiguous() ); + + Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + + Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = + Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 ); + + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x1(1) == & x8(1,1,2,3,0,1,2,3) ); + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = + Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x2(1,0) == & x8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,3,1,1,2,3) ); + ASSERT_TRUE( & x2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + + // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = + Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = + Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); + ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + + Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = + Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ + , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ + ); + + ASSERT_TRUE( ! sx4.is_contiguous() ); + + for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) + for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) + for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) + for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { + ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); + } + + } +} + +template< class Space > +void test_left_2() +{ + typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_type x4("x4",2,3,4,5); + + ASSERT_TRUE( x4.is_contiguous() ); + + Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x4 , 0, 0, 0, 0 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x4(0,0,0,0) ); + + Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = + Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1(0) == & x4(0,1,2,3) ); + ASSERT_TRUE( & x1(1) == & x4(1,1,2,3) ); + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = + Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, Kokkos::pair<int,int>(1,3), 2 ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2(0,0) == & x4(0,1,1,2) ); + ASSERT_TRUE( & x2(1,0) == & x4(1,1,1,2) ); + ASSERT_TRUE( & x2(0,1) == & x4(0,1,2,2) ); + ASSERT_TRUE( & x2(1,1) == & x4(1,1,2,2) ); + + // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = + Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = + Kokkos::subview( x4, 1, Kokkos::pair<int,int>(0,2) + , 2, Kokkos::pair<int,int>(1,4) ); + + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2(0,0) == & x4(1,0,2,1) ); + ASSERT_TRUE( & sx2(1,0) == & x4(1,1,2,1) ); + ASSERT_TRUE( & sx2(0,1) == & x4(1,0,2,2) ); + ASSERT_TRUE( & sx2(1,1) == & x4(1,1,2,2) ); + ASSERT_TRUE( & sx2(0,2) == & x4(1,0,2,3) ); + ASSERT_TRUE( & sx2(1,2) == & x4(1,1,2,3) ); + + Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = + Kokkos::subview( x4, Kokkos::pair<int,int>(1,2) /* of [2] */ + , Kokkos::pair<int,int>(1,3) /* of [3] */ + , Kokkos::pair<int,int>(0,4) /* of [4] */ + , Kokkos::pair<int,int>(2,4) /* of [5] */ + ); + + ASSERT_TRUE( ! sx4.is_contiguous() ); + + for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) + for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) + for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) + for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { + ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x4( 1+i0, 1+i1, 0+i2, 2+i3 ) ); + } + + } +} + +template< class Space > +void test_left_3() +{ + typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_type xm("x4",10,5); + + ASSERT_TRUE( xm.is_contiguous() ); + + Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( xm , 5, 3 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm(5,3) ); + + Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = + Kokkos::subview( xm, Kokkos::ALL(), 3 ); + + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) { + ASSERT_TRUE( & x1(i) == & xm(i,3) ); + } + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = + Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) + for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { + ASSERT_TRUE( & x2(i,j) == & xm(1+i,j) ); + } + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2c = + Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) ); + + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) + for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { + ASSERT_TRUE( & x2c(i,j) == & xm(i,2+j) ); + } + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n1 = + Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() ); + + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); + + Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n2 = + Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) ); + + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + + } +} + +//---------------------------------------------------------------------------- + +template< class Space > +void test_right_0() +{ + typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space > + view_static_8_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_static_8_type x_static_8("x_static_right_8"); + + Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + + ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); + + Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = + Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + + ASSERT_TRUE( x1.dimension_0() == 2 ); + ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,1) ); + ASSERT_TRUE( & x1(1) == & x_static_8(0,1,2,3,0,1,2,2) ); + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = + Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair<int,int>(1,3) + , 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + + ASSERT_TRUE( x2.dimension_0() == 2 ); + ASSERT_TRUE( x2.dimension_1() == 2 ); + ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,1,0,1,2,1) ); + ASSERT_TRUE( & x2(1,0) == & x_static_8(0,1,2,2,0,1,2,1) ); + ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,1,0,1,2,2) ); + ASSERT_TRUE( & x2(1,1) == & x_static_8(0,1,2,2,0,1,2,2) ); + + // Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 = + Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( sx2.dimension_0() == 2 ); + ASSERT_TRUE( sx2.dimension_1() == 2 ); + ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); + ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + + Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ + , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ + ); + + ASSERT_TRUE( sx4.dimension_0() == 2 ); + ASSERT_TRUE( sx4.dimension_1() == 2 ); + ASSERT_TRUE( sx4.dimension_2() == 2 ); + ASSERT_TRUE( sx4.dimension_3() == 2 ); + for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) + for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) + for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) + for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { + ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0, 0+i0, 1, 1+i1, 1, 0+i2, 2, 2+i3) ); + } + + } +} + +template< class Space > +void test_right_1() +{ + typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space > + view_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_type x8("x_right_8",2,3,4,5); + + Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + + ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + + Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = + Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + + ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,1) ); + ASSERT_TRUE( & x1(1) == & x8(0,1,2,3,0,1,2,2) ); + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = + Kokkos::subview( x8, 0, 1, 2, Kokkos::pair<int,int>(1,3) + , 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + + ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,1,0,1,2,1) ); + ASSERT_TRUE( & x2(1,0) == & x8(0,1,2,2,0,1,2,1) ); + ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,1,0,1,2,2) ); + ASSERT_TRUE( & x2(1,1) == & x8(0,1,2,2,0,1,2,2) ); + + // Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 = + Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = + Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3 + , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + + ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); + ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + + Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = + Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ + , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ + , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ + ); + + for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) + for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) + for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) + for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { + ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); + } + + } +} + +template< class Space > +void test_right_3() +{ + typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ; + + if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + + view_type xm("x4",10,5); + + ASSERT_TRUE( xm.is_contiguous() ); + + Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( xm , 5, 3 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm(5,3) ); + + Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = + Kokkos::subview( xm, 3, Kokkos::ALL() ); + + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) { + ASSERT_TRUE( & x1(i) == & xm(3,i) ); + } + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2c = + Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() ); + + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) + for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { + ASSERT_TRUE( & x2c(i,j) == & xm(1+i,j) ); + } + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = + Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) + for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { + ASSERT_TRUE( & x2(i,j) == & xm(i,2+j) ); + } + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n1 = + Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() ); + + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); + + Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n2 = + Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) ); + + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + + } +} + +namespace Impl { + +constexpr int N0=113; +constexpr int N1=11; +constexpr int N2=17; +constexpr int N3=5; +constexpr int N4=7; + +template<class SubView,class View> +void test_Check1D(SubView a, View b, std::pair<int,int> range) { + int errors = 0; + for(int i=0;i<range.second-range.first;i++) { + if(a(i)!=b(i+range.first)) + errors++; + } + if(errors>0) + std::cout << "Error Suviews test_Check1D: " << errors <<std::endl; + ASSERT_TRUE( errors == 0 ); +} + +template<class SubView,class View> +void test_Check1D2D(SubView a, View b, int i0, std::pair<int,int> range) { + int errors = 0; + for(int i1=0;i1<range.second-range.first;i1++) { + if(a(i1)!=b(i0,i1+range.first)) + errors++; + } + if(errors>0) + std::cout << "Error Suviews test_Check1D2D: " << errors <<std::endl; + ASSERT_TRUE( errors == 0 ); +} + +template<class SubView,class View> +void test_Check2D3D(SubView a, View b, int i0, std::pair<int,int> range1, std::pair<int,int> range2) { + int errors = 0; + for(int i1=0;i1<range1.second-range1.first;i1++) { + for(int i2=0;i2<range2.second-range2.first;i2++) { + if(a(i1,i2)!=b(i0,i1+range1.first,i2+range2.first)) + errors++; + } + } + if(errors>0) + std::cout << "Error Suviews test_Check2D3D: " << errors <<std::endl; + ASSERT_TRUE( errors == 0 ); +} + +template<class SubView,class View> +void test_Check3D5D(SubView a, View b, int i0, int i1, std::pair<int,int> range2, std::pair<int,int> range3, std::pair<int,int> range4) { + int errors = 0; + for(int i2=0;i2<range2.second-range2.first;i2++) { + for(int i3=0;i3<range3.second-range3.first;i3++) { + for(int i4=0;i4<range4.second-range4.first;i4++) { + if(a(i2,i3,i4)!=b(i0,i1,i2+range2.first,i3+range3.first,i4+range4.first)) + errors++; + } + } + } + if(errors>0) + std::cout << "Error Suviews test_Check3D5D: " << errors <<std::endl; + ASSERT_TRUE( errors == 0 ); +} + +template<class Space, class LayoutSub, class Layout, class LayoutOrg> +void test_1d_assign_impl() { + + { //Breaks + Kokkos::View<int*,LayoutOrg,Space> a("A",N0); + Kokkos::fence(); + for(int i=0; i<N0; i++) + a(i) = i; + + Kokkos::View<int[N0],Layout,Space> a1(a); + Kokkos::fence(); + test_Check1D(a1,a,std::pair<int,int>(0,N0)); + + Kokkos::View<int[N0],LayoutSub,Space> a2(a1); + Kokkos::fence(); + test_Check1D(a2,a,std::pair<int,int>(0,N0)); + a1 = a; + test_Check1D(a1,a,std::pair<int,int>(0,N0)); + + //Runtime Fail expected + //Kokkos::View<int[N1]> afail1(a); + + //Compile Time Fail expected + //Kokkos::View<int[N1]> afail2(a1); + } + + { // Works + Kokkos::View<int[N0],LayoutOrg,Space> a("A"); + Kokkos::View<int*,Layout,Space> a1(a); + Kokkos::fence(); + test_Check1D(a1,a,std::pair<int,int>(0,N0)); + a1 = a; + Kokkos::fence(); + test_Check1D(a1,a,std::pair<int,int>(0,N0)); + } +} + +template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg> +void test_2d_subview_3d_impl_type() { + Kokkos::View<int***,LayoutOrg,Space> a_org("A",N0,N1,N2); + Kokkos::View<Type,Layout,Space> a(a_org); + for(int i0=0; i0<N0; i0++) + for(int i1=0; i1<N1; i1++) + for(int i2=0; i2<N2; i2++) + a(i0,i1,i2) = i0*1000000+i1*1000+i2; + Kokkos::View<TypeSub,LayoutSub,Space> a1; + a1 = Kokkos::subview(a,3,Kokkos::ALL(),Kokkos::ALL()); + Kokkos::fence(); + test_Check2D3D(a1,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); + + Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,Kokkos::ALL(),Kokkos::ALL()); + Kokkos::fence(); + test_Check2D3D(a2,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); +} + +template<class Space, class LayoutSub, class Layout, class LayoutOrg> +void test_2d_subview_3d_impl_layout() { + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_3d_impl_type<Space,int** [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int** [N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int** [N2],int** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_3d_impl_type<Space,int*** ,int[N1][N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int*** ,int* [N2],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int*** ,int** ,LayoutSub, Layout, LayoutOrg>(); +} + +template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg> +void test_2d_subview_5d_impl_type() { + Kokkos::View<int*****,LayoutOrg,Space> a_org("A",N0,N1,N2,N3,N4); + Kokkos::View<Type,Layout,Space> a(a_org); + for(int i0=0; i0<N0; i0++) + for(int i1=0; i1<N1; i1++) + for(int i2=0; i2<N2; i2++) + for(int i3=0; i3<N3; i3++) + for(int i4=0; i4<N4; i4++) + a(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4; + Kokkos::View<TypeSub,LayoutSub,Space> a1; + a1 = Kokkos::subview(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); + Kokkos::fence(); + test_Check3D5D(a1,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); + + Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); + Kokkos::fence(); + test_Check3D5D(a2,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); +} + +template<class Space, class LayoutSub, class Layout, class LayoutOrg> +void test_2d_subview_5d_impl_layout() { + test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_5d_impl_type<Space, int**** [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int**** [N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int**** [N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int**** [N4],int*** ,LayoutSub, Layout, LayoutOrg>(); + + test_2d_subview_5d_impl_type<Space, int***** ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int***** ,int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int***** ,int** [N4],LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_5d_impl_type<Space, int***** ,int*** ,LayoutSub, Layout, LayoutOrg>(); +} +} + +template< class Space > +void test_1d_assign() { + Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); + //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); + //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutRight ,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight >(); + //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); + //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); +} + +template<class Space > +void test_2d_subview_3d() { + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>(); +} + +template<class Space > +void test_2d_subview_5d() { + Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>(); + Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>(); + Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft>(); + Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>(); +} + +} +//---------------------------------------------------------------------------- + diff --git a/lib/kokkos/core/unit_test/UnitTestMain.cpp b/lib/kokkos/core/unit_test/UnitTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f952ab3db51028aff0a0ebfe313b2639e353ab87 --- /dev/null +++ b/lib/kokkos/core/unit_test/UnitTestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/doc/Doxyfile b/lib/kokkos/doc/Doxyfile new file mode 100644 index 0000000000000000000000000000000000000000..bc5c7486b27fc55ede35359b969af0a8008f960b --- /dev/null +++ b/lib/kokkos/doc/Doxyfile @@ -0,0 +1,127 @@ +# +# Include the global look and feel options +# +@INCLUDE = ../../common/Doxyfile +# +# Package options +# +PROJECT_NAME = "Kokkos Core Kernels Package" +PROJECT_NUMBER = "Version of the Day" +OUTPUT_DIRECTORY = . +OUTPUT_LANGUAGE = English + +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = YES +HIDE_UNDOC_MEMBERS = YES +HIDE_UNDOC_CLASSES = YES +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ALWAYS_DETAILED_SEC = YES +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = +INTERNAL_DOCS = NO +CLASS_DIAGRAMS = YES +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +VERBATIM_HEADERS = YES +SHOW_INCLUDE_FILES = YES +#JAVADOC_AUTOBRIEF = YES +INHERIT_DOCS = YES +INLINE_INHERITED_MEMB = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = NO +TAB_SIZE = 2 +ENABLED_SECTIONS = +SORT_BRIEF_DOCS = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_FORMAT = "$file:$line: $text" + +# +# INPUT: Where to find files that Doxygen should process. ../classic +# has a doc/ subdirectory with its own Doxyfile that points to its own +# files. The other Kokkos subpackages don't currently have their own +# Doxyfile files, so we have to do it manually here. +# +# mfh 26 Sep 2013: I've only added those directories in the Core +# subpackage that constitute the "public interface" of that +# subpackage. Please feel free to include additional subdirectories +# of ../core if you want to generate their documentation as well. +# +# mfh 26 Sep 2013: I've only added the Kokkos subpackages here that I +# think are ready for Doxygen documentation generation. Please feel +# free to amend this list as you see fit. +# + +INPUT = index.doc ../classic ../core/src ../containers/src ../linalg/src +FILE_PATTERNS = *.hpp *.cpp *.cuh *.cu +RECURSIVE = NO +EXCLUDE_PATTERNS = *.x *.o *.out +EXAMPLE_PATH = +EXAMPLE_RECURSIVE = YES +EXAMPLE_PATTERNS = *.cpp *.hpp +IMAGE_PATH = +INPUT_FILTER = +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 4 +IGNORE_PREFIX = +# +# What diagrams are created +# +CLASS_GRAPH = YES +COLLABORATION_GRAPH = NO +INCLUDE_GRAPH = NO +INCLUDED_BY_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +# +# Preprocessing +# +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES +SEARCH_INCLUDES = YES +INCLUDE_FILE_PATTERNS = +PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DOXYGEN_USE_ONLY +INCLUDE_PATH = ../src +EXPAND_AS_DEFINED = +# +# Links to other packages +# +TAGFILES = ../../common/tag_files/teuchos.tag=../../../teuchos/doc/html ../../common/tag_files/epetra.tag=../../../epetra/doc/html \ + ../../common/tag_files/belos.tag=../../../belos/doc/html ../../common/tag_files/anasazi.tag=../../../anasazi/doc/html \ + ../../common/tag_files/kokkos.tag=../../../kokkos/doc/html +GENERATE_TAGFILE = ../../common/tag_files/tpetra.tag +ALLEXTERNALS = NO +EXTERNAL_GROUPS = NO +# +# Environment +# +PERL_PATH = /usr/bin/perl +HAVE_DOT = YES +DOT_PATH = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +# +# What kind of documentation is generated +# +#GENERATE_HTML = YES +#HTML_OUTPUT = html +#HTML_HEADER = includes/header.html +#HTML_FOOTER = includes/footer.html +#HTML_STYLESHEET = includes/stylesheet.css +#HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +DISABLE_INDEX = NO +GENERATE_LATEX = NO +GENERATE_RTF = NO +GENERATE_MAN = NO +GENERATE_XML = NO diff --git a/lib/kokkos/doc/Kokkos_PG.pdf b/lib/kokkos/doc/Kokkos_PG.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3c415698c0d9fec315f317b71db19f2a019b6f6e Binary files /dev/null and b/lib/kokkos/doc/Kokkos_PG.pdf differ diff --git a/lib/kokkos/doc/README b/lib/kokkos/doc/README new file mode 100644 index 0000000000000000000000000000000000000000..31e75f365c21a116a1fb736097f4f524e8d1e021 --- /dev/null +++ b/lib/kokkos/doc/README @@ -0,0 +1,32 @@ +Kokkos uses the Doxygen tool for providing three documentation +sources: +- man pages +- Latex User Guide +- HTML Online User Guide. + +Man Pages + +Man pages are available for all files and functions in the directory +TRILINOS_HOME/doc/kokkos/man, where TRILINOS_HOME is the location of your +copy of Trilinos. To use these pages with the Unix man utility, add +the directory to your man path as follows: + +setenv MANPATH `echo $MANPATH`:TRILINOS_HOME/doc/kokkos/man + + +LaTeX User Guide + +A postscript version of this guide is in +TRILINOS_HOME/doc/kokkos/latex/user_guide.ps. The LaTeX source is in the +directory TRILINOS_HOME/doc/kokkos/latex. + +HTML Online User Guide + +The online guide is initiated by pointing your browser to +TRILINOS_HOME/doc/kokkos/html/index.html + +Any question, comments or suggestions are welcome. Please send to +Mike Heroux at + +320-845-7695 +maherou@sandia.gov diff --git a/lib/kokkos/doc/build_docs b/lib/kokkos/doc/build_docs new file mode 100755 index 0000000000000000000000000000000000000000..da1d3e4f6e061804b1fb2fe21b356b691494df5d --- /dev/null +++ b/lib/kokkos/doc/build_docs @@ -0,0 +1,15 @@ +#!/bin/sh + +if [ $TRILINOS_HOME ]; then + echo "TRILINOS_HOME has already been set!" +else + echo "TRILINOS_HOME has not been set. Setting it!" + export TRILINOS_HOME=`pwd`/../../.. +fi + +echo +echo "Generating main Kokkos doxygen documentation ..." +echo + +doxygen Doxyfile + diff --git a/lib/kokkos/doc/index.doc b/lib/kokkos/doc/index.doc new file mode 100644 index 0000000000000000000000000000000000000000..27a9e4f2e7b90e11bbcde7309e9bf1544e3b386f --- /dev/null +++ b/lib/kokkos/doc/index.doc @@ -0,0 +1,72 @@ +/*! +\mainpage Trilinos/Kokkos: Shared-memory programming interface and computational kernels + +\section Kokkos_Intro Introduction + +The %Kokkos package has two main components. The first, sometimes +called "%Kokkos Array" or just "%Kokkos," implements a +performance-portable shared-memory parallel programming model and data +containers. The second, called "%Kokkos Classic," consists of +computational kernels that support the %Tpetra package. + +\section Kokkos_Kokkos The %Kokkos programming model + +%Kokkos implements a performance-portable shared-memory parallel +programming model and data containers. It lets you write an algorithm +once, and just change a template parameter to get the optimal data +layout for your hardware. %Kokkos has back-ends for the following +parallel programming models: + +- Kokkos::Threads: POSIX Threads (Pthreads) +- Kokkos::OpenMP: OpenMP +- Kokkos::Cuda: NVIDIA's CUDA programming model for graphics + processing units (GPUs) +- Kokkos::Serial: No thread parallelism + +%Kokkos also has optimizations for shared-memory parallel systems with +nonuniform memory access (NUMA). Its containers can hold data of any +primitive ("plain old") data type (and some aggregate types). %Kokkos +Array may be used as a stand-alone programming model. + +%Kokkos' parallel operations include the following: + +- parallel_for: a thread-parallel "for loop" +- parallel_reduce: a thread-parallel reduction +- parallel_scan: a thread-parallel prefix scan operation + +as well as expert-level platform-independent interfaces to thread +"teams," per-team "shared memory," synchronization, and atomic update +operations. + +%Kokkos' data containers include the following: + +- Kokkos::View: A multidimensional array suitable for thread-parallel + operations. Its layout (e.g., row-major or column-major) is + optimized by default for the particular thread-parallel device. +- Kokkos::Vector: A drop-in replacement for std::vector that eases + porting from standard sequential C++ data structures to %Kokkos' + parallel data structures. +- Kokkos::UnorderedMap: A parallel lookup table comparable in + functionality to std::unordered_map. + +%Kokkos also uses the above basic containers to implement higher-level +data structures, like sparse graphs and matrices. + +A good place to start learning about %Kokkos would be <a href="http://trilinos.sandia.gov/events/trilinos_user_group_2013/presentations/2013-11-TUG-Kokkos-Tutorial.pdf">these tutorial slides</a> from the 2013 Trilinos Users' Group meeting. + +\section Kokkos_Classic %Kokkos Classic + +"%Kokkos Classic" consists of computational kernels that support the +%Tpetra package. These kernels include sparse matrix-vector multiply, +sparse triangular solve, Gauss-Seidel, and dense vector operations. +They are templated on the type of objects (\c Scalar) on which they +operate. This component was not meant to be visible to users; it is +an implementation detail of the %Tpetra distributed linear algebra +package. + +%Kokkos Classic also implements a shared-memory parallel programming +model. This inspired and preceded the %Kokkos programming model +described in the previous section. Users should consider the %Kokkos +Classic programming model deprecated, and prefer the new %Kokkos +programming model. +*/ diff --git a/lib/kokkos/example/CMakeLists.txt b/lib/kokkos/example/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3809cc2ea57a26ba1b3003a2e706fee912ccabc9 --- /dev/null +++ b/lib/kokkos/example/CMakeLists.txt @@ -0,0 +1,20 @@ + + +# Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake +# +TRIBITS_SUBPACKAGE(Example) + +TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(fixture) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(feint) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(fenl) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(multi_fem) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(md_skeleton) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(grow_array) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(sort_array) +if(NOT Kokkos_ENABLE_Cuda) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(tutorial) +endif() +TRIBITS_SUBPACKAGE_POSTPROCESS() + diff --git a/lib/kokkos/example/README b/lib/kokkos/example/README new file mode 100644 index 0000000000000000000000000000000000000000..ec64004842b0f254de2f1d67a9cb5c272bf15607 --- /dev/null +++ b/lib/kokkos/example/README @@ -0,0 +1,16 @@ +This directory contains example application proxies that use different +parts of Kokkos. If you are looking for the FENL ("finite element +nonlinear" solve) example, it has moved into the LinAlg subpackage of +Tpetra. + +MANIFEST: + + - common: Header files used by different examples + - feint: Unstructured finite-element method + - fixture: Some other finite-element method example + - global_2_local_ids: Example of global-to-local index lookup + - grow_array: Parallel dynamic memory allocation + - md_skeleton: Molecular dynamics + - query_device: Kokkos' HWLOC wrapper for querying device topology + - sort_array: Parallel sort + - tutorial: Kokkos tutorial (START HERE) diff --git a/lib/kokkos/example/cmake/Dependencies.cmake b/lib/kokkos/example/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0d86e78712b85db319a17029e66e96292a410573 --- /dev/null +++ b/lib/kokkos/example/cmake/Dependencies.cmake @@ -0,0 +1,4 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_DEP_PACKAGES KokkosCore KokkosContainers KokkosAlgorithms + TEST_OPTIONAL_DEP_TPLS CUSPARSE MKL + ) diff --git a/lib/kokkos/example/common/VectorImport.hpp b/lib/kokkos/example/common/VectorImport.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8ecd74d463c08f3624cf2be2d44b0ca1e4d008ad --- /dev/null +++ b/lib/kokkos/example/common/VectorImport.hpp @@ -0,0 +1,294 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_VECTORIMPORT_HPP +#define KOKKOS_VECTORIMPORT_HPP + +#include <utility> +#include <limits> +#include <iostream> +#include <sstream> +#include <stdexcept> + +#include <Kokkos_Core.hpp> + +#include <WrapMPI.hpp> + +namespace Kokkos { +namespace Example { + +template< class CommMessageType , class CommIdentType , class VectorType > +struct VectorImport ; + +} // namespace Example +} // namespace Kokkos + +#if ! defined( KOKKOS_HAVE_MPI ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +template< class CommMessageType , class CommIdentType , class VectorType > +struct VectorImport { + + const MPI_Comm comm ; + const unsigned count_owned ; + const unsigned count_receive ; + + VectorImport( MPI_Comm arg_comm , + const CommMessageType & , + const CommMessageType & , + const CommIdentType & , + const unsigned arg_count_owned , + const unsigned arg_count_receive ) + : comm( arg_comm ) + , count_owned( arg_count_owned ) + , count_receive( arg_count_receive ) + {} + + inline + void operator()( const VectorType & ) const {} +}; + + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#else /* defined( KOKKOS_HAVE_MPI ) */ + +namespace Kokkos { +namespace Example { + +template< class CommMessageType , class CommIdentType , class VectorType > +class VectorImport { +private: + + // rank == 1 or array_layout == LayoutRight + enum { OK = Kokkos::Impl::StaticAssert< + ( VectorType::rank == 1 ) || + Kokkos::Impl::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value + >::value }; + + typedef typename VectorType::HostMirror HostVectorType ; + + enum { ReceiveInPlace = + Kokkos::Impl::is_same< typename VectorType::memory_space , + typename HostVectorType::memory_space >::value }; + + const CommMessageType recv_msg ; + const CommMessageType send_msg ; + const CommIdentType send_nodeid ; + VectorType send_buffer ; + HostVectorType host_send_buffer ; + HostVectorType host_recv_buffer ; + unsigned chunk ; + +public: + + const MPI_Comm comm ; + const unsigned count_owned ; + const unsigned count_receive ; + + struct Pack { + typedef typename VectorType::execution_space execution_space ; + const CommIdentType index ; + const VectorType source ; + const VectorType buffer ; + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned i ) const + { buffer( i ) = source( index(i) ); } + + Pack( const CommIdentType & arg_index , + const VectorType & arg_source , + const VectorType & arg_buffer ) + : index( arg_index ) + , source( arg_source ) + , buffer( arg_buffer ) + { + Kokkos::parallel_for( index.dimension_0() , *this ); + execution_space::fence(); + } + }; + + VectorImport( MPI_Comm arg_comm , + const CommMessageType & arg_recv_msg , + const CommMessageType & arg_send_msg , + const CommIdentType & arg_send_nodeid , + const unsigned arg_count_owned , + const unsigned arg_count_receive ) + : recv_msg( arg_recv_msg ) + , send_msg( arg_send_msg ) + , send_nodeid( arg_send_nodeid ) + , send_buffer() + , host_send_buffer() + , host_recv_buffer() + , comm( arg_comm ) + , count_owned( arg_count_owned ) + , count_receive( arg_count_receive ) + { + if ( ! ReceiveInPlace ) { + host_recv_buffer = HostVectorType("recv_buffer",count_receive); + } + + unsigned send_count = 0 ; + for ( unsigned i = 0 ; i < send_msg.dimension_0() ; ++i ) { send_count += send_msg(i,1); } + send_buffer = VectorType("send_buffer",send_count); + host_send_buffer = Kokkos::create_mirror_view( send_buffer ); + } + + inline + void operator()( const VectorType & v ) const + { + typedef typename VectorType::value_type scalar_type ; + + const int mpi_tag = 42 ; + const unsigned chunk = v.dimension_1(); + + // Subvector for receives + const std::pair<unsigned,unsigned> recv_range( count_owned , count_owned + count_receive ); + const VectorType recv_vector = Kokkos::subview( v , recv_range ); + + std::vector< MPI_Request > recv_request( recv_msg.dimension_0() , MPI_REQUEST_NULL ); + + { // Post receives + scalar_type * ptr = + ReceiveInPlace ? recv_vector.ptr_on_device() : host_recv_buffer.ptr_on_device(); + + for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) { + const int proc = recv_msg(i,0); + const int count = recv_msg(i,1) * chunk ; + + MPI_Irecv( ptr , count * sizeof(scalar_type) , MPI_BYTE , + proc , mpi_tag , comm , & recv_request[i] ); + + ptr += count ; + } + } + + MPI_Barrier( comm ); + + { // Pack and send + const Pack pack( send_nodeid , v , send_buffer ); + + Kokkos::deep_copy( host_send_buffer , send_buffer ); + + scalar_type * ptr = host_send_buffer.ptr_on_device(); + + for ( size_t i = 0 ; i < send_msg.dimension_0() ; ++i ) { + const int proc = send_msg(i,0); + const int count = send_msg(i,1) * chunk ; + + // MPI_Ssend blocks until + // (1) a receive is matched for the message and + // (2) the send buffer can be re-used. + // + // It is suggested that MPI_Ssend will have the best performance: + // http://www.mcs.anl.gov/research/projects/mpi/sendmode.html . + + MPI_Ssend( ptr , + count * sizeof(scalar_type) , MPI_BYTE , + proc , mpi_tag , comm ); + + ptr += count ; + } + } + + // Wait for receives and verify: + + for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) { + MPI_Status recv_status ; + int recv_which = 0 ; + int recv_size = 0 ; + + MPI_Waitany( recv_msg.dimension_0() , & recv_request[0] , & recv_which , & recv_status ); + + const int recv_proc = recv_status.MPI_SOURCE ; + + MPI_Get_count( & recv_status , MPI_BYTE , & recv_size ); + + // Verify message properly received: + + const int expected_proc = recv_msg(recv_which,0); + const int expected_size = recv_msg(recv_which,1) * chunk * sizeof(scalar_type); + + if ( ( expected_proc != recv_proc ) || + ( expected_size != recv_size ) ) { + + int local_rank = 0 ; + + MPI_Comm_rank( comm , & local_rank ); + + std::ostringstream msg ; + msg << "VectorImport error:" + << " P" << local_rank + << " received from P" << recv_proc + << " size " << recv_size + << " expected " << expected_size + << " from P" << expected_proc ; + throw std::runtime_error( msg.str() ); + } + } + + // Copy received data to device memory. + + if ( ! ReceiveInPlace ) { Kokkos::deep_copy( recv_vector , host_recv_buffer ); } + } +}; + +} // namespace Example +} // namespace Kokkos + +#endif + +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_VECTORIMPORT_HPP */ + + diff --git a/lib/kokkos/example/common/WrapMPI.hpp b/lib/kokkos/example/common/WrapMPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c75e4bc5c7e2a41753d32f7d6967f65ffbf86581 --- /dev/null +++ b/lib/kokkos/example/common/WrapMPI.hpp @@ -0,0 +1,103 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_WRAP_MPI +#define KOKKOS_EXAMPLE_WRAP_MPI + +#include <Kokkos_Macros.hpp> +#include <string> + +#if defined( KOKKOS_HAVE_MPI ) + +#include <mpi.h> + +namespace Kokkos { +namespace Example { + +inline +double all_reduce( double value , MPI_Comm comm ) +{ + double local = value ; + MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_SUM , comm ); + return value ; +} + +inline +double all_reduce_max( double value , MPI_Comm comm ) +{ + double local = value ; + MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_MAX , comm ); + return value ; +} + +} // namespace Example +} // namespace Kokkos + +#elif ! defined( KOKKOS_HAVE_MPI ) + +/* Wrap the the MPI_Comm type and heavily used MPI functions + * to reduce the number of '#if defined( KOKKOS_HAVE_MPI )' + * blocks which have to be sprinkled throughout the examples. + */ + +typedef int MPI_Comm ; + +inline int MPI_Comm_size( MPI_Comm , int * size ) { *size = 1 ; return 0 ; } +inline int MPI_Comm_rank( MPI_Comm , int * rank ) { *rank = 0 ; return 0 ; } +inline int MPI_Barrier( MPI_Comm ) { return 0; } + +namespace Kokkos { +namespace Example { + +inline +double all_reduce( double value , MPI_Comm ) { return value ; } + +inline +double all_reduce_max( double value , MPI_Comm ) { return value ; } + +} // namespace Example +} // namespace Kokkos + +#endif /* ! defined( KOKKOS_HAVE_MPI ) */ +#endif /* #ifndef KOKKOS_EXAMPLE_WRAP_MPI */ + diff --git a/lib/kokkos/example/feint/CMakeLists.txt b/lib/kokkos/example/feint/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0018b9f9f538de77ce776daaa267a037714387ad --- /dev/null +++ b/lib/kokkos/example/feint/CMakeLists.txt @@ -0,0 +1,18 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) + +SET(SOURCES "") + +FILE(GLOB SOURCES *.cpp) + +LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp) + +TRIBITS_ADD_EXECUTABLE( + feint + SOURCES ${SOURCES} + COMM serial mpi + ) + diff --git a/lib/kokkos/example/feint/ElemFunctor.hpp b/lib/kokkos/example/feint/ElemFunctor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..651e34c2eed247f37986886c86f04ce24d76c551 --- /dev/null +++ b/lib/kokkos/example/feint/ElemFunctor.hpp @@ -0,0 +1,489 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP +#define KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP + +#include <stdio.h> +#include <Kokkos_Core.hpp> +#include <BoxElemFixture.hpp> + +namespace Kokkos { +namespace Example { + +/** \brief Numerically integrate a function on a finite element mesh and + * project the integrated values to nodes. + */ +template< class FixtureType , + class FunctionType , + bool PerformScatterAddWithAtomic > +struct FiniteElementIntegration ; + +// Specialized for an 'Example::BoxElemFixture' finite element mesh +template< class Device , BoxElemPart::ElemOrder ElemOrder , class GridMap , + class FunctionType , + bool PerformScatterAddWithAtomic > +struct FiniteElementIntegration< + Kokkos::Example::BoxElemFixture< Device , ElemOrder , GridMap > , + FunctionType , + PerformScatterAddWithAtomic > +{ + // Element mesh types: + typedef Kokkos::Example::BoxElemFixture< Device , ElemOrder > + BoxFixtureType ; + + typedef Kokkos::Example::HexElement_Data< BoxFixtureType::ElemNode > + HexElemDataType ; + + enum { ElemNodeCount = HexElemDataType::element_node_count }; + enum { IntegrationCount = HexElemDataType::integration_count }; + enum { ValueCount = FunctionType::value_count }; + + // Dictionary of view types: + typedef View<int*, Device> ElemErrorType ; + typedef View<double*[ElemNodeCount][ValueCount],Device> ElemValueType ; + typedef View<double*[ValueCount], Device> NodeValueType ; + + // Data members for this Functor: + const HexElemDataType m_hex_elem_data ; ///< Master element + const BoxFixtureType m_box_fixture ; ///< Unstructured mesh data + const FunctionType m_function ; ///< Function to integrate + const ElemErrorType m_elem_error ; ///< Flags for element errors + const ElemValueType m_elem_integral ; ///< Per-element quantities + const NodeValueType m_node_lumped ; ///< Quantities lumped to nodes + + //---------------------------------------- + + FiniteElementIntegration( + const BoxFixtureType & box_fixture , + const FunctionType & function ) + : m_hex_elem_data() + , m_box_fixture( box_fixture ) // Shallow copy of the mesh fixture + , m_function( function ) + , m_elem_error( "elem_error" , box_fixture.elem_count() ) + , m_elem_integral( "elem_integral" , box_fixture.elem_count() ) + , m_node_lumped( "node_lumped" , box_fixture.node_count() ) + {} + + //---------------------------------------- + // Device for parallel dispatch. + typedef typename Device::execution_space execution_space; + + // Value type for global parallel reduction. + struct value_type { + double value[ ValueCount ]; ///< Integrated quantitie + int error ; ///< Element inversion flag + }; + + //---------------------------------------- + // Transform element interpolation function gradients and + // compute determinant of spatial jacobian. + KOKKOS_INLINE_FUNCTION + float transform_gradients( + const float grad[][ ElemNodeCount ] , // Gradient of bases master element + const double coord[][ ElemNodeCount ] , + float dpsi[][ ElemNodeCount ] ) const + { + enum { TensorDim = 9 }; + enum { j11 = 0 , j12 = 1 , j13 = 2 , + j21 = 3 , j22 = 4 , j23 = 5 , + j31 = 6 , j32 = 7 , j33 = 8 }; + + // Temporary for jacobian accumulation is double for summation accuracy. + double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + for( int i = 0; i < ElemNodeCount ; ++i ) { + J[j11] += grad[0][i] * coord[0][i] ; + J[j12] += grad[0][i] * coord[1][i] ; + J[j13] += grad[0][i] * coord[2][i] ; + + J[j21] += grad[1][i] * coord[0][i] ; + J[j22] += grad[1][i] * coord[1][i] ; + J[j23] += grad[1][i] * coord[2][i] ; + + J[j31] += grad[2][i] * coord[0][i] ; + J[j32] += grad[2][i] * coord[1][i] ; + J[j33] += grad[2][i] * coord[2][i] ; + } + + // Inverse jacobian, compute as double and store as float. + float invJ[ TensorDim ] = { + float( J[j22] * J[j33] - J[j23] * J[j32] ) , + float( J[j13] * J[j32] - J[j12] * J[j33] ) , + float( J[j12] * J[j23] - J[j13] * J[j22] ) , + + float( J[j23] * J[j31] - J[j21] * J[j33] ) , + float( J[j11] * J[j33] - J[j13] * J[j31] ) , + float( J[j13] * J[j21] - J[j11] * J[j23] ) , + + float( J[j21] * J[j32] - J[j22] * J[j31] ) , + float( J[j12] * J[j31] - J[j11] * J[j32] ) , + float( J[j11] * J[j22] - J[j12] * J[j21] ) }; + + const float detJ = J[j11] * invJ[j11] + + J[j21] * invJ[j12] + + J[j31] * invJ[j13] ; + + { + const float detJinv = 1.0 / detJ ; + for ( int i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; } + } + + // Transform gradients: + for ( int i = 0; i < ElemNodeCount ; ++i ) { + dpsi[0][i] = grad[0][i] * invJ[j11] + + grad[1][i] * invJ[j12] + + grad[2][i] * invJ[j13]; + dpsi[1][i] = grad[0][i] * invJ[j21] + + grad[1][i] * invJ[j22] + + grad[2][i] * invJ[j23]; + dpsi[2][i] = grad[0][i] * invJ[j31] + + grad[1][i] * invJ[j32] + + grad[2][i] * invJ[j33]; + } + + return detJ ; + } + + // Functor's function called for each element in the mesh + // to numerically integrate the function and add element quantities + // to the global integral. + KOKKOS_INLINE_FUNCTION + void operator()( const int ielem , value_type & update ) const + { + // Local temporaries for gathering nodal data. + double node_coord[3][ ElemNodeCount ]; + + int inode[ ElemNodeCount ] ; + + // Gather indices of element's node from global memory to local memory. + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + inode[i] = m_box_fixture.elem_node( ielem , i ); + } + + // Gather coordinates of element's nodes from global memory to local memory. + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + node_coord[0][i] = m_box_fixture.node_coord( inode[i] , 0 ); + node_coord[1][i] = m_box_fixture.node_coord( inode[i] , 1 ); + node_coord[2][i] = m_box_fixture.node_coord( inode[i] , 2 ); + } + + // Local temporary to accumulate numerical integration + // of vector valued function. + double accum[ ValueCount ]; + + for ( int j = 0 ; j < ValueCount ; ++j ) { accum[j] = 0 ; } + + int error = 0 ; + + // Numerical integration loop for this element: + for ( int k = 0 ; k < IntegrationCount ; ++k ) { + + // Integration point in space as interpolated from nodal coordinates: + double point[3] = { 0 , 0 , 0 }; + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + point[0] += node_coord[0][i] * m_hex_elem_data.values[k][i] ; + point[1] += node_coord[1][i] * m_hex_elem_data.values[k][i] ; + point[2] += node_coord[2][i] * m_hex_elem_data.values[k][i] ; + } + + // Example function vector value at cubature point: + double val_at_pt[ ValueCount ]; + m_function( point , val_at_pt ); + + // Temporary array for transformed element basis functions' gradient. + // Not used in this example, but computed anyway by the more general + // deformation function. + float dpsi[3][ ElemNodeCount ]; + + // Compute deformation jacobian, transform basis function gradient, + // and return determinant of deformation jacobian. + float detJ = transform_gradients( m_hex_elem_data.gradients[k] , + node_coord , dpsi ); + + // Check for inverted spatial jacobian + if ( detJ <= 0 ) { error = 1 ; detJ = 0 ; } + + // Integration weight. + const float w = m_hex_elem_data.weights[k] * detJ ; + + // Cubature of function. + for ( int j = 0 ; j < ValueCount ; ++j ) { + accum[j] += val_at_pt[j] * w ; + } + } + + m_elem_error(ielem) = error ; + + + // Element contribution to global integral: + + if ( error ) { update.error = 1 ; } + + for ( int j = 0 ; j < ValueCount ; ++j ) { update.value[j] += accum[j] ; } + + // Element-node quantity for lumping to nodes: + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + for ( int j = 0 ; j < ValueCount ; ++j ) { + // Save element's integral apportionment to nodes to global memory + m_elem_integral( ielem , i , j ) = accum[j] / ElemNodeCount ; + } + } + + if ( PerformScatterAddWithAtomic ) { + // Option to immediately scatter-add the integrated quantities to nodes. + // This is a race condition as two or more threads could attempt + // concurrent update of nodal values. The atomic_fetch_add (+=) + // function guarantees that the summation will occur correctly; + // however, there can be no guarantee for the order of summation. + // Due to non-associativity of floating point arithmetic the result + // is non-deterministic within bounds of floating point round-off. + + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + for ( int j = 0 ; j < ValueCount ; ++j ) { + Kokkos::atomic_fetch_add( & m_node_lumped( inode[i] , j ) , + m_elem_integral( ielem , i , j ) ); + } + } + } + } + //-------------------------------------------------------------------------- + + // Initialization of the global reduction value. + KOKKOS_INLINE_FUNCTION + void init( value_type & update ) const + { + for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] = 0 ; + update.error = 0 ; + } + + // Join two contributions to global reduction value. + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & update , + volatile const value_type & input ) const + { + for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] += input.value[j] ; + if ( input.error ) update.error = 1 ; + } +}; + +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +template< class ViewElemNode , + class ViewNodeScan , + class ViewNodeElem > +void map_node_to_elem( const ViewElemNode & elem_node , + const ViewNodeScan & node_scan , + const ViewNodeElem & node_elem ); + +/** \brief Functor to gather-sum elements' per-node quantities + * to element nodes. Gather-sum is thread safe and + * does not require atomic updates. + */ +template< class ViewNodeValue , + class ViewElemValue , + bool AlreadyUsedAtomic > +struct LumpElemToNode { + + typedef typename ViewElemValue::execution_space execution_space ; + + // In this example we know that the ViewElemValue + // array specification is < double*[nNode][nValue] > + +#if KOKKOS_USING_EXP_VIEW + enum { value_count = ViewElemValue::dimension::N2 }; +#else + enum { value_count = ViewElemValue::shape_type::N2 }; +#endif + + ViewNodeValue m_node_value ; ///< Integrated values at nodes + ViewElemValue m_elem_value ; ///< Values apportioned to nodes + View<int*, execution_space> m_node_scan ; ///< Offsets for nodes->element + View<int*[2],execution_space> m_node_elem ; ///< Node->element connectivity + + // Only allocate node->element connectivity if have + // not already used atomic updates for the nodes. + template< class ViewElemNode > + LumpElemToNode( const ViewNodeValue & node_value , + const ViewElemValue & elem_value , + const ViewElemNode & elem_node ) + : m_node_value( node_value ) + , m_elem_value( elem_value ) + , m_node_scan( "node_scan" , + AlreadyUsedAtomic ? 0 : node_value.dimension_0() + 1 ) + , m_node_elem( "node_elem" , + AlreadyUsedAtomic ? 0 : elem_node.dimension_0() * + elem_node.dimension_1() ) + { + if ( ! AlreadyUsedAtomic ) { + map_node_to_elem( elem_node , m_node_scan , m_node_elem ); + } + } + + //---------------------------------------- + + struct value_type { double value[ value_count ]; }; + + KOKKOS_INLINE_FUNCTION + void operator()( const int inode , value_type & update ) const + { + if ( ! AlreadyUsedAtomic ) { + // Sum element quantities to a local variable. + value_type local ; + for ( int j = 0 ; j < value_count ; ++j ) { local.value[j] = 0 ; } + + { + // nodes' element ids span [i,end) + int i = m_node_scan(inode); + const int end = m_node_scan(inode+1); + + for ( ; i < end ; ++i ) { + // element #ielem , local node #ielem_node is this node: + const int ielem = m_node_elem(i,0); + const int ielem_node = m_node_elem(i,1); + // Sum the vector-values quantity + for ( int j = 0 ; j < value_count ; ++j ) { + local.value[j] += m_elem_value( ielem , ielem_node , j ); + } + } + } + + // Assign nodal quantity (no race condition). + // Sum global value. + for ( int j = 0 ; j < value_count ; ++j ) { + m_node_value( inode , j ) = local.value[j] ; + update.value[j] += local.value[j] ; + } + } + else { + // Already used atomic update of the nodal quantity, + // query and sum the value. + for ( int j = 0 ; j < value_count ; ++j ) { + update.value[j] += m_node_value( inode , j ); + } + } + } + + KOKKOS_INLINE_FUNCTION + void init( value_type & update ) const + { for ( int j = 0 ; j < value_count ; ++j ) { update.value[j] = 0 ; } } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & update , + volatile const value_type & input ) const + { + for ( int j = 0 ; j < value_count ; ++j ) { + update.value[j] += input.value[j] ; + } + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< class ViewElemNode , + class ViewNodeScan , + class ViewNodeElem > +void map_node_to_elem( const ViewElemNode & elem_node , + const ViewNodeScan & node_scan , + const ViewNodeElem & node_elem ) +{ + typedef typename ViewElemNode::host_mirror_space host_mirror_space ; + + const typename ViewElemNode::HostMirror host_elem_node = + Kokkos::create_mirror_view(elem_node); + + const typename ViewNodeScan::HostMirror host_node_scan = + Kokkos::create_mirror_view(node_scan); + + const typename ViewNodeElem::HostMirror host_node_elem = + Kokkos::create_mirror_view(node_elem); + + const int elem_count = host_elem_node.dimension_0(); + const int elem_node_count = host_elem_node.dimension_1(); + const int node_count = host_node_scan.dimension_0() - 1 ; + + const View<int*, host_mirror_space > + node_elem_count( "node_elem_count" , node_count ); + + Kokkos::deep_copy( host_elem_node , elem_node ); + + for ( int i = 0 ; i < elem_count ; ++i ) { + for ( int j = 0 ; j < elem_node_count ; ++j ) { + ++node_elem_count( host_elem_node(i,j) ); + } + } + + for ( int i = 0 ; i < node_count ; ++i ) { + host_node_scan(i+1) += host_node_scan(i) + node_elem_count(i); + node_elem_count(i) = 0 ; + } + + for ( int i = 0 ; i < elem_count ; ++i ) { + for ( int j = 0 ; j < elem_node_count ; ++j ) { + const int inode = host_elem_node(i,j); + const int offset = host_node_scan(inode) + node_elem_count(inode); + + host_node_elem( offset , 0 ) = i ; + host_node_elem( offset , 1 ) = j ; + + ++node_elem_count(inode); + } + } + + Kokkos::deep_copy( node_scan , host_node_scan ); + Kokkos::deep_copy( node_elem , host_node_elem ); +} + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP */ + diff --git a/lib/kokkos/example/feint/Makefile b/lib/kokkos/example/feint/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f198a974c1e34d4014323eb34d03e7aa1f7445ba --- /dev/null +++ b/lib/kokkos/example/feint/Makefile @@ -0,0 +1,61 @@ +KOKKOS_PATH = ../.. + +vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/feint + +EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/feint/*.hpp) + +default: build_all + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += \ + -I${KOKKOS_PATH}/example/common \ + -I${KOKKOS_PATH}/example/fixture \ + -I${KOKKOS_PATH}/example/feint + +EXE_EXAMPLE_FEINT = KokkosExample_Feint +OBJ_EXAMPLE_FEINT = BoxElemPart.o main.o + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_EXAMPLE_FEINT += feint_cuda.o +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_EXAMPLE_FEINT += feint_threads.o +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_EXAMPLE_FEINT += feint_openmp.o +endif + +TARGETS = $(EXE_EXAMPLE_FEINT) + +#TEST_TARGETS = + +$(EXE_EXAMPLE_FEINT) : $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FEINT) + +build_all : $(TARGETS) + +test : build_all + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/feint/feint.hpp b/lib/kokkos/example/feint/feint.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4b7196c4c713f1bd1c1f077818393edfa5ff506f --- /dev/null +++ b/lib/kokkos/example/feint/feint.hpp @@ -0,0 +1,165 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FEINT_HPP +#define KOKKOS_EXAMPLE_FEINT_HPP + +#include <iostream> +#include <BoxElemFixture.hpp> +#include <ElemFunctor.hpp> +#include <feint_fwd.hpp> + +namespace Kokkos { +namespace Example { + +/** \brief Vector valued function to numerically integrate. + * + * F(X) = { 1 , x , y , z , x*y , y*z , z*x , x*y*z } + * + * Integrates on a unit cube to: + * { 1 , 1/2 , 1/2 , 1/2 , 1/4 , 1/4 , 1/4 , 1/8 } + */ +struct MyFunctionType { + + enum { value_count = 8 }; + + // Evaluate function at coordinate. + template< typename CoordType , typename ValueType > + KOKKOS_INLINE_FUNCTION + void operator()( const CoordType point[] , ValueType value[] ) const + { + value[0] = 1 ; + value[1] = point[0] ; + value[2] = point[1] ; + value[3] = point[2] ; + value[4] = point[0] * point[1] ; + value[5] = point[1] * point[2] ; + value[6] = point[2] * point[0] ; + value[7] = point[0] * point[1] * point[2] ; + } +}; + +template < class Device , bool UseAtomic > +void feint( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ) +{ + //---------------------------------------- + // Create the unstructured finite element mesh box fixture on the device: + + typedef Kokkos::Example:: + BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > + // BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemQuadratic > + BoxFixtureType ; + + // MPI distributed parallel domain decomposition of the fixture. + // Either by element (DecomposeElem) or by node (DecomposeNode) + // with ghosted elements. + + static const Kokkos::Example::BoxElemPart::Decompose + decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; + // decompose = Kokkos::Example::BoxElemPart:: DecomposeNode ; + + // Not using MPI in this example. + const unsigned mpi_rank = 0 ; + const unsigned mpi_size = 1 ; + + const BoxFixtureType fixture( decompose , mpi_size , mpi_rank , + global_elem_nx , + global_elem_ny , + global_elem_nz ); + + //---------------------------------------- + // Create and execute the numerical integration functor on the device: + + typedef Kokkos::Example:: + FiniteElementIntegration< BoxFixtureType , MyFunctionType , UseAtomic > + FeintType ; + + const FeintType feint( fixture , MyFunctionType() ); + + typename FeintType::value_type elem_integral ; + + // A reduction for the global integral: + Kokkos::parallel_reduce( fixture.elem_count() , feint , elem_integral ); + + if ( elem_integral.error ) { + std::cout << "An element had a spatial jacobian error" << std::endl ; + return ; + } + + std::cout << "Elem integral =" ; + for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) { + std::cout << " " << elem_integral.value[i] ; + } + std::cout << std::endl ; + + //---------------------------------------- + // Create and execute the nodal lumped value projection and reduction functor: + + typedef Kokkos::Example:: + LumpElemToNode< typename FeintType::NodeValueType , + typename FeintType::ElemValueType , + UseAtomic > LumpType ; + + const LumpType lump( feint.m_node_lumped , + feint.m_elem_integral , + fixture.elem_node() ); + + typename LumpType ::value_type node_sum ; + + Kokkos::parallel_reduce( fixture.node_count() , lump , node_sum ); + + std::cout << "Node lumped sum =" ; + for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) { + std::cout << " " << node_sum.value[i] ; + } + std::cout << std::endl ; +} + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_HPP */ + diff --git a/lib/kokkos/example/feint/feint_cuda.cpp b/lib/kokkos/example/feint/feint_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1370822febbc4e2099c68ed691ddfb425d47772a --- /dev/null +++ b/lib/kokkos/example/feint/feint_cuda.cpp @@ -0,0 +1,67 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) + +#include <feint.hpp> + +namespace Kokkos { +namespace Example { + +template void feint<Kokkos::Cuda,false>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +template void feint<Kokkos::Cuda,true>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif + diff --git a/lib/kokkos/example/feint/feint_fwd.hpp b/lib/kokkos/example/feint/feint_fwd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f02e547cfccbb6b89c7bca192a87bcad95a266f0 --- /dev/null +++ b/lib/kokkos/example/feint/feint_fwd.hpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP +#define KOKKOS_EXAMPLE_FEINT_FWD_HPP + +namespace Kokkos { +namespace Example { + +template < class Device , bool UseAtomic > +void feint( + const unsigned global_elem_nx = 100 , + const unsigned global_elem_ny = 115 , + const unsigned global_elem_nz = 130 ); + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP */ + diff --git a/lib/kokkos/example/feint/feint_openmp.cpp b/lib/kokkos/example/feint/feint_openmp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf2fdca5ba4b7d0e2a60d74e433fc9b849e79108 --- /dev/null +++ b/lib/kokkos/example/feint/feint_openmp.cpp @@ -0,0 +1,67 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#ifdef KOKKOS_HAVE_OPENMP + +#include <feint.hpp> + +namespace Kokkos { +namespace Example { + +template void feint<Kokkos::OpenMP,false>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +template void feint<Kokkos::OpenMP,true>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif + diff --git a/lib/kokkos/example/feint/feint_threads.cpp b/lib/kokkos/example/feint/feint_threads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5dcf5654f1e7b5cdc24e8110a866eabfdbcb3350 --- /dev/null +++ b/lib/kokkos/example/feint/feint_threads.cpp @@ -0,0 +1,66 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_HAVE_PTHREAD ) + +#include <feint.hpp> + +namespace Kokkos { +namespace Example { + +template void feint< Kokkos::Threads ,false>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +template void feint< Kokkos::Threads ,true>( + const unsigned global_elem_nx , + const unsigned global_elem_ny , + const unsigned global_elem_nz ); + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */ diff --git a/lib/kokkos/example/feint/main.cpp b/lib/kokkos/example/feint/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c45d483e76da818a8f5a3cb6dd7cb76522504467 --- /dev/null +++ b/lib/kokkos/example/feint/main.cpp @@ -0,0 +1,110 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include <utility> +#include <iostream> + +#include <Kokkos_Core.hpp> + +#include <feint_fwd.hpp> + +int main() +{ +#if defined( KOKKOS_HAVE_PTHREAD ) + { + // Use 4 cores per NUMA region, unless fewer available + + const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() ); + + Kokkos::Threads::initialize( use_numa_count * use_cores_per_numa ); + + std::cout << "feint< Threads , NotUsingAtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::Threads , false >(); + + std::cout << "feint< Threads , Usingtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::Threads , true >(); + + Kokkos::Threads::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + { + // Use 4 cores per NUMA region, unless fewer available + + const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() ); + + Kokkos::OpenMP::initialize( use_numa_count * use_cores_per_numa ); + + std::cout << "feint< OpenMP , NotUsingAtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::OpenMP , false >(); + + std::cout << "feint< OpenMP , Usingtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::OpenMP , true >(); + + Kokkos::OpenMP::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_CUDA ) + { + // Initialize Host mirror device + Kokkos::HostSpace::execution_space::initialize(1); + const unsigned device_count = Kokkos::Cuda::detect_device_count(); + + // Use the last device: + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(device_count-1) ); + + std::cout << "feint< Cuda , NotUsingAtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::Cuda , false >(); + + std::cout << "feint< Cuda , UsingAtomic >" << std::endl ; + Kokkos::Example::feint< Kokkos::Cuda , true >(); + + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + + } +#endif +} + diff --git a/lib/kokkos/example/fenl/CGSolve.hpp b/lib/kokkos/example/fenl/CGSolve.hpp new file mode 100644 index 0000000000000000000000000000000000000000..06a0030e09cadb0f9ab82080d8ab244563ae54b0 --- /dev/null +++ b/lib/kokkos/example/fenl/CGSolve.hpp @@ -0,0 +1,296 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_CG_SOLVE +#define KOKKOS_EXAMPLE_CG_SOLVE + +#include <cmath> +#include <limits> +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> + +#include <WrapMPI.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +template< typename ValueType , class Space > +struct CrsMatrix { + typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ; + typedef View< ValueType * , Space > coeff_type ; + + StaticCrsGraphType graph ; + coeff_type coeff ; + + CrsMatrix() : graph(), coeff() {} + + CrsMatrix( const StaticCrsGraphType & arg_graph ) + : graph( arg_graph ) + , coeff( "crs_matrix_coeff" , arg_graph.entries.dimension_0() ) + {} +}; + +template< typename MScalar + , typename VScalar + , class Space > +struct Multiply { + + const Example::CrsMatrix< MScalar , Space > m_A ; + const Kokkos::View< const VScalar * , Space > m_x ; + const Kokkos::View< VScalar * , Space > m_y ; + + KOKKOS_INLINE_FUNCTION + void operator()( const int iRow ) const + { + const int iEntryBegin = m_A.graph.row_map[iRow]; + const int iEntryEnd = m_A.graph.row_map[iRow+1]; + + double sum = 0 ; + + for ( int iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) { + sum += m_A.coeff(iEntry) * m_x( m_A.graph.entries(iEntry) ); + } + + m_y(iRow) = sum ; + } + + Multiply( const View< VScalar * , Space > & y + , const CrsMatrix< MScalar , Space > & A + , const View< const VScalar * , Space > & x + ) + : m_A( A ), m_x( x ), m_y( y ) + {} +}; + +template< typename MScalar + , typename VScalar + , class Space > +inline +void multiply( const int nrow + , const Kokkos::View< VScalar * , Space > & y + , const Example::CrsMatrix< MScalar , Space > & A + , const Kokkos::View< VScalar * , Space > & x + ) +{ + Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,nrow), Multiply<MScalar,VScalar,Space>( y , A , x ) ); +} + +template< typename ValueType , class Space > +struct WAXPBY { + const Kokkos::View< const ValueType * , Space > m_x ; + const Kokkos::View< const ValueType * , Space > m_y ; + const Kokkos::View< ValueType * , Space > m_w ; + const double m_alpha ; + const double m_beta ; + + KOKKOS_INLINE_FUNCTION + void operator()( const int i ) const + { m_w(i) = m_alpha * m_x(i) + m_beta * m_y(i); } + + WAXPBY( const View< ValueType * , Space > & arg_w + , const double arg_alpha + , const View< ValueType * , Space > & arg_x + , const double arg_beta + , const View< ValueType * , Space > & arg_y + ) + : m_x( arg_x ) + , m_y( arg_y ) + , m_w( arg_w ) + , m_alpha( arg_alpha ) + , m_beta( arg_beta ) + {} +}; + +template< typename VScalar , class Space > +void waxpby( const int n + , const Kokkos::View< VScalar * , Space > & arg_w + , const double arg_alpha + , const Kokkos::View< VScalar * , Space > & arg_x + , const double arg_beta + , const Kokkos::View< VScalar * , Space > & arg_y + ) +{ + Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,n), WAXPBY<VScalar,Space>(arg_w,arg_alpha,arg_x,arg_beta,arg_y) ); +} + +template< typename VScalar , class Space > +struct Dot { + typedef double value_type ; + + const Kokkos::View< const VScalar * , Space > m_x ; + const Kokkos::View< const VScalar * , Space > m_y ; + + KOKKOS_INLINE_FUNCTION + void operator()( const int i , value_type & update ) const + { update += m_x(i) * m_y(i); } + + Dot( const Kokkos::View< VScalar * , Space > & arg_x + , const Kokkos::View< VScalar * , Space > & arg_y + ) + : m_x(arg_x), m_y(arg_y) {} +}; + +template< typename VScalar , class Space > +double dot( const int n + , const Kokkos::View< VScalar * , Space > & arg_x + , const Kokkos::View< VScalar * , Space > & arg_y + ) +{ + double result = 0 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy<Space>(0,n) , Dot<VScalar,Space>( arg_x , arg_y ) , result ); + return result ; +} + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +struct CGSolveResult { + size_t iteration ; + double iter_time ; + double matvec_time ; + double norm_res ; +}; + +template< class ImportType + , typename MScalar + , typename VScalar + , class Space + > +inline +void cgsolve( const ImportType & import + , const CrsMatrix< MScalar , Space > & A + , const Kokkos::View< VScalar * , Space > & b + , const Kokkos::View< VScalar * , Space > & x + , const size_t maximum_iteration = 200 + , const double tolerance = std::numeric_limits<double>::epsilon() + , CGSolveResult * result = 0 + ) +{ + typedef View< VScalar * , Space > VectorType ; + + const size_t count_owned = import.count_owned ; + const size_t count_total = import.count_owned + import.count_receive; + + size_t iteration = 0 ; + double iter_time = 0 ; + double matvec_time = 0 ; + double norm_res = 0 ; + + // Need input vector to matvec to be owned + received + VectorType pAll ( "cg::p" , count_total ); + + VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) ); + VectorType r ( "cg::r" , count_owned ); + VectorType Ap( "cg::Ap", count_owned ); + + /* r = b - A * x ; */ + + /* p = x */ Kokkos::deep_copy( p , x ); + /* import p */ import( pAll ); + /* Ap = A * p */ multiply( count_owned , Ap , A , pAll ); + /* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap ); + /* p = r */ Kokkos::deep_copy( p , r ); + + double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm ); + + norm_res = sqrt( old_rdot ); + iteration = 0 ; + + Kokkos::Timer wall_clock ; + Kokkos::Timer timer; + + while ( tolerance < norm_res && iteration < maximum_iteration ) { + + /* pAp_dot = dot( p , Ap = A * p ) */ + + timer.reset(); + /* import p */ import( pAll ); + /* Ap = A * p */ multiply( count_owned , Ap , A , pAll ); + Space::fence(); + matvec_time += timer.seconds(); + + const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm ); + const double alpha = old_rdot / pAp_dot ; + + /* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x ); + /* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r ); + + const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm ); + const double beta = r_dot / old_rdot ; + + /* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p ); + + norm_res = sqrt( old_rdot = r_dot ); + + ++iteration ; + } + + Space::fence(); + iter_time = wall_clock.seconds(); + + if ( 0 != result ) { + result->iteration = iteration ; + result->iter_time = iter_time ; + result->matvec_time = matvec_time ; + result->norm_res = norm_res ; + } +} + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXAMPLE_CG_SOLVE */ + + diff --git a/lib/kokkos/example/fenl/CMakeLists.txt b/lib/kokkos/example/fenl/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..150656b16e13d4977c6ea975b87a785103cc7d48 --- /dev/null +++ b/lib/kokkos/example/fenl/CMakeLists.txt @@ -0,0 +1,17 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) + +SET(SOURCES "") + +FILE( GLOB SOURCES *.cpp ) + +LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp ) + +TRIBITS_ADD_EXECUTABLE( + fenl + SOURCES ${SOURCES} + COMM serial mpi + ) diff --git a/lib/kokkos/example/fenl/Makefile b/lib/kokkos/example/fenl/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5d8e6fd3034ec7c20044552a5688fc6751e374fb --- /dev/null +++ b/lib/kokkos/example/fenl/Makefile @@ -0,0 +1,54 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +vpath %.cpp ${SRC_DIR}/../fixture ${SRC_DIR} + +EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*.hpp ${SRC_DIR}/*.hpp) + +default: build_all + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos +# use KOKKOS_DEVICE here +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += \ + -I${SRC_DIR}/../common \ + -I${SRC_DIR}/../fixture \ + -I${SRC_DIR} + +EXE_EXAMPLE_FENL = KokkosExample_Fenl +OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o + +TARGETS = $(EXE_EXAMPLE_FENL) + +#TEST_TARGETS = + +$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL) + +build_all : $(TARGETS) + +test : build_all + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/fenl/fenl.cpp b/lib/kokkos/example/fenl/fenl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5cba608f4454d38ebea3904caba3b8f6b6078dc --- /dev/null +++ b/lib/kokkos/example/fenl/fenl.cpp @@ -0,0 +1,117 @@ +/* +// ************************************************************************ +// +// Kokkos: Manycore Performance-Portable Multidimensional Arrays +// Copyright (2012) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +*/ + +#include <HexElement.hpp> +#include <fenl_impl.hpp> + +namespace Kokkos { +namespace Example { +namespace FENL { + +#if defined( KOKKOS_HAVE_PTHREAD ) + +template +Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + + +template +Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemQuadratic >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + +#endif + + +#if defined (KOKKOS_HAVE_OPENMP) + +template +Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + + +template +Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemQuadratic >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + +#endif + +#if defined( KOKKOS_HAVE_CUDA ) + +template +Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + + +template +Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemQuadratic >( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + +#endif + + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + diff --git a/lib/kokkos/example/fenl/fenl.hpp b/lib/kokkos/example/fenl/fenl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e524a378c0c2eb75223c933c51267ff55852d08b --- /dev/null +++ b/lib/kokkos/example/fenl/fenl.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FENL_HPP +#define KOKKOS_EXAMPLE_FENL_HPP + +#include <stdlib.h> +#include <BoxElemPart.hpp> +#include <WrapMPI.hpp> + +namespace Kokkos { +namespace Example { +namespace FENL { + +struct Perf { + size_t global_elem_count ; + size_t global_node_count ; + size_t newton_iter_count ; + size_t cg_iter_count ; + double map_ratio ; + double fill_node_set ; + double scan_node_count ; + double fill_graph_entries ; + double sort_graph_entries ; + double fill_element_graph ; + double create_sparse_matrix ; + double fill_time ; + double bc_time ; + double matvec_time ; + double cg_time ; + double newton_residual ; + double error_max ; + +}; + +template < class Device , BoxElemPart::ElemOrder ElemOrder > +Perf fenl( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int global_elems[] ); + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_FENL_HPP */ + diff --git a/lib/kokkos/example/fenl/fenl_functors.hpp b/lib/kokkos/example/fenl/fenl_functors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3020c99a2f58637c64377a9ae933d0e3549d3c12 --- /dev/null +++ b/lib/kokkos/example/fenl/fenl_functors.hpp @@ -0,0 +1,1173 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP +#define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP + +#include <stdio.h> + +#include <iostream> +#include <fstream> +#include <iomanip> +#include <cstdlib> +#include <cmath> +#include <limits> + +#include <Kokkos_Pair.hpp> +#include <Kokkos_UnorderedMap.hpp> + +#include <impl/Kokkos_Timer.hpp> + +#include <BoxElemFixture.hpp> +#include <HexElement.hpp> +#include <CGSolve.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode > +class NodeNodeGraph { +public: + + typedef typename ElemNodeIdView::execution_space execution_space ; + typedef pair<unsigned,unsigned> key_type ; + + typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ; + typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ; + typedef Kokkos::View< unsigned , execution_space > UnsignedValue ; + + // Static dimensions of 0 generate compiler warnings or errors. + typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space > + ElemGraphType ; + + struct TagFillNodeSet {}; + struct TagScanNodeCount {}; + struct TagFillGraphEntries {}; + struct TagSortGraphEntries {}; + struct TagFillElementGraph {}; + +private: + + enum PhaseType { FILL_NODE_SET , + SCAN_NODE_COUNT , + FILL_GRAPH_ENTRIES , + SORT_GRAPH_ENTRIES , + FILL_ELEMENT_GRAPH }; + + const unsigned node_count ; + const ElemNodeIdView elem_node_id ; + UnsignedValue row_total ; + RowMapType row_count ; + RowMapType row_map ; + SetType node_node_set ; + PhaseType phase ; + +public: + + CrsGraphType graph ; + ElemGraphType elem_graph ; + + struct Times + { + double ratio; + double fill_node_set; + double scan_node_count; + double fill_graph_entries; + double sort_graph_entries; + double fill_element_graph; + }; + + NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id , + const unsigned arg_node_count, + Times & results + ) + : node_count(arg_node_count) + , elem_node_id( arg_elem_node_id ) + , row_total( "row_total" ) + , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop + , row_map( "graph_row_map" , node_count + 1 ) + , node_node_set() + , phase( FILL_NODE_SET ) + , graph() + , elem_graph() + { + //-------------------------------- + // Guess at capacity required for the map: + + Kokkos::Timer wall_clock ; + + wall_clock.reset(); + phase = FILL_NODE_SET ; + + // upper bound on the capacity + size_t set_capacity = (28ull * node_count) / 2; + unsigned failed_insert_count = 0 ; + + do { + // Zero the row count to restart the fill + Kokkos::deep_copy( row_count , 0u ); + + node_node_set = SetType( ( set_capacity += failed_insert_count ) ); + + // May be larger that requested: + set_capacity = node_node_set.capacity(); + + Kokkos::parallel_reduce( Kokkos::RangePolicy<execution_space,TagFillNodeSet>(0,elem_node_id.dimension_0()) + , *this + , failed_insert_count ); + + } while ( failed_insert_count ); + + execution_space::fence(); + results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity(); + results.fill_node_set = wall_clock.seconds(); + //-------------------------------- + + wall_clock.reset(); + phase = SCAN_NODE_COUNT ; + + // Exclusive scan of row_count into row_map + // including the final total in the 'node_count + 1' position. + // Zero the 'row_count' values. + Kokkos::parallel_scan( node_count , *this ); + + // Zero the row count for the fill: + Kokkos::deep_copy( row_count , 0u ); + + unsigned graph_entry_count = 0 ; + + Kokkos::deep_copy( graph_entry_count , row_total ); + + // Assign graph's row_map and allocate graph's entries + graph.row_map = row_map ; + graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count ); + + //-------------------------------- + // Fill graph's entries from the (node,node) set. + + execution_space::fence(); + results.scan_node_count = wall_clock.seconds(); + + wall_clock.reset(); + phase = FILL_GRAPH_ENTRIES ; + Kokkos::parallel_for( node_node_set.capacity() , *this ); + + execution_space::fence(); + results.fill_graph_entries = wall_clock.seconds(); + + //-------------------------------- + // Done with the temporary sets and arrays + wall_clock.reset(); + phase = SORT_GRAPH_ENTRIES ; + + row_total = UnsignedValue(); + row_count = RowMapType(); + row_map = RowMapType(); + node_node_set.clear(); + + //-------------------------------- + + Kokkos::parallel_for( node_count , *this ); + + execution_space::fence(); + results.sort_graph_entries = wall_clock.seconds(); + + //-------------------------------- + // Element-to-graph mapping: + wall_clock.reset(); + phase = FILL_ELEMENT_GRAPH ; + elem_graph = ElemGraphType("elem_graph", elem_node_id.dimension_0() ); + Kokkos::parallel_for( elem_node_id.dimension_0() , *this ); + + execution_space::fence(); + results.fill_element_graph = wall_clock.seconds(); + } + + //------------------------------------ + // parallel_for: create map and count row length + + KOKKOS_INLINE_FUNCTION + void operator()( const TagFillNodeSet & , unsigned ielem , unsigned & count ) const + { + // Loop over element's (row_local_node,col_local_node) pairs: + for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) { + + const unsigned row_node = elem_node_id( ielem , row_local_node ); + + for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.dimension_1() ; ++col_local_node ) { + + const unsigned col_node = elem_node_id( ielem , col_local_node ); + + // If either node is locally owned then insert the pair into the unordered map: + + if ( row_node < row_count.dimension_0() || col_node < row_count.dimension_0() ) { + + const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ; + + const typename SetType::insert_result result = node_node_set.insert( key ); + + // A successfull insert: the first time this pair was added + if ( result.success() ) { + + // If row node is owned then increment count + if ( row_node < row_count.dimension_0() ) { atomic_fetch_add( & row_count( row_node ) , 1 ); } + + // If column node is owned and not equal to row node then increment count + if ( col_node < row_count.dimension_0() && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , 1 ); } + } + else if ( result.failed() ) { + ++count ; + } + } + } + } + } + + KOKKOS_INLINE_FUNCTION + void fill_graph_entries( const unsigned iset ) const + { + if ( node_node_set.valid_at(iset) ) { + // Add each entry to the graph entries. + + const key_type key = node_node_set.key_at(iset) ; + const unsigned row_node = key.first ; + const unsigned col_node = key.second ; + + if ( row_node < row_count.dimension_0() ) { + const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 ); + graph.entries( offset ) = col_node ; + } + + if ( col_node < row_count.dimension_0() && col_node != row_node ) { + const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , 1 ); + graph.entries( offset ) = row_node ; + } + } + } + + KOKKOS_INLINE_FUNCTION + void sort_graph_entries( const unsigned irow ) const + { + const unsigned row_beg = graph.row_map( irow ); + const unsigned row_end = graph.row_map( irow + 1 ); + for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) { + const unsigned col = graph.entries(i); + unsigned j = i ; + for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) { + graph.entries(j) = graph.entries(j-1); + } + graph.entries(j) = col ; + } + } + + KOKKOS_INLINE_FUNCTION + void fill_elem_graph_map( const unsigned ielem ) const + { + for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) { + + const unsigned row_node = elem_node_id( ielem , row_local_node ); + + for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.dimension_1() ; ++col_local_node ) { + + const unsigned col_node = elem_node_id( ielem , col_local_node ); + + unsigned entry = ~0u ; + + if ( row_node + 1 < graph.row_map.dimension_0() ) { + + const unsigned entry_end = graph.row_map( row_node + 1 ); + + entry = graph.row_map( row_node ); + + for ( ; entry < entry_end && graph.entries(entry) != col_node ; ++entry ); + + if ( entry == entry_end ) entry = ~0u ; + } + + elem_graph( ielem , row_local_node , col_local_node ) = entry ; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned iwork ) const + { +/* + if ( phase == FILL_NODE_SET ) { + operator()( TagFillNodeSet() , iwork ); + } + else */ + if ( phase == FILL_GRAPH_ENTRIES ) { + fill_graph_entries( iwork ); + } + else if ( phase == SORT_GRAPH_ENTRIES ) { + sort_graph_entries( iwork ); + } + else if ( phase == FILL_ELEMENT_GRAPH ) { + fill_elem_graph_map( iwork ); + } + } + + //------------------------------------ + // parallel_scan: row offsets + + typedef unsigned value_type ; + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned irow , unsigned & update , const bool final ) const + { + // exclusive scan + if ( final ) { row_map( irow ) = update ; } + + update += row_count( irow ); + + if ( final ) { + if ( irow + 1 == row_count.dimension_0() ) { + row_map( irow + 1 ) = update ; + row_total() = update ; + } + } + } + + // For the reduce phase: + KOKKOS_INLINE_FUNCTION + void init( const TagFillNodeSet & , unsigned & update ) const { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( const TagFillNodeSet & + , volatile unsigned & update + , volatile const unsigned & input ) const { update += input ; } + + // For the scan phase:: + KOKKOS_INLINE_FUNCTION + void init( unsigned & update ) const { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( volatile unsigned & update + , volatile const unsigned & input ) const { update += input ; } + + //------------------------------------ +}; + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +template< class ElemCompType > +class NodeElemGatherFill { +public: + + typedef typename ElemCompType::execution_space execution_space ; + typedef typename ElemCompType::vector_type vector_type ; + typedef typename ElemCompType::sparse_matrix_type sparse_matrix_type ; + typedef typename ElemCompType::elem_node_type elem_node_type ; + typedef typename ElemCompType::elem_vectors_type elem_vectors_type ; + typedef typename ElemCompType::elem_matrices_type elem_matrices_type ; + typedef typename ElemCompType::elem_graph_type elem_graph_type ; + + static const unsigned ElemNodeCount = ElemCompType::ElemNodeCount ; + + //------------------------------------ + +private: + + typedef Kokkos::StaticCrsGraph< unsigned[2] , execution_space > CrsGraphType ; + typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ; + typedef Kokkos::View< unsigned , execution_space > UnsignedValue ; + + enum PhaseType { FILL_NODE_COUNT , + SCAN_NODE_COUNT , + FILL_GRAPH_ENTRIES , + SORT_GRAPH_ENTRIES , + GATHER_FILL }; + + const elem_node_type elem_node_id ; + const elem_graph_type elem_graph ; + UnsignedValue row_total ; + RowMapType row_count ; + RowMapType row_map ; + CrsGraphType graph ; + vector_type residual ; + sparse_matrix_type jacobian ; + elem_vectors_type elem_residual ; + elem_matrices_type elem_jacobian ; + PhaseType phase ; + +public: + + NodeElemGatherFill() + : elem_node_id() + , elem_graph() + , row_total() + , row_count() + , row_map() + , graph() + , residual() + , jacobian() + , elem_residual() + , elem_jacobian() + , phase( FILL_NODE_COUNT ) + {} + + NodeElemGatherFill( const NodeElemGatherFill & rhs ) + : elem_node_id( rhs.elem_node_id ) + , elem_graph( rhs.elem_graph ) + , row_total( rhs.row_total ) + , row_count( rhs.row_count ) + , row_map( rhs.row_map ) + , graph( rhs.graph ) + , residual( rhs.residual ) + , jacobian( rhs.jacobian ) + , elem_residual( rhs.elem_residual ) + , elem_jacobian( rhs.elem_jacobian ) + , phase( rhs.phase ) + {} + + NodeElemGatherFill( const elem_node_type & arg_elem_node_id , + const elem_graph_type & arg_elem_graph , + const vector_type & arg_residual , + const sparse_matrix_type & arg_jacobian , + const elem_vectors_type & arg_elem_residual , + const elem_matrices_type & arg_elem_jacobian ) + : elem_node_id( arg_elem_node_id ) + , elem_graph( arg_elem_graph ) + , row_total( "row_total" ) + , row_count( "row_count" , arg_residual.dimension_0() ) + , row_map( "graph_row_map" , arg_residual.dimension_0() + 1 ) + , graph() + , residual( arg_residual ) + , jacobian( arg_jacobian ) + , elem_residual( arg_elem_residual ) + , elem_jacobian( arg_elem_jacobian ) + , phase( FILL_NODE_COUNT ) + { + //-------------------------------- + // Count node->element relations + + phase = FILL_NODE_COUNT ; + + Kokkos::parallel_for( elem_node_id.dimension_0() , *this ); + + //-------------------------------- + + phase = SCAN_NODE_COUNT ; + + // Exclusive scan of row_count into row_map + // including the final total in the 'node_count + 1' position. + // Zero the 'row_count' values. + Kokkos::parallel_scan( residual.dimension_0() , *this ); + + // Zero the row count for the fill: + Kokkos::deep_copy( row_count , typename RowMapType::value_type(0) ); + + unsigned graph_entry_count = 0 ; + + Kokkos::deep_copy( graph_entry_count , row_total ); + + // Assign graph's row_map and allocate graph's entries + graph.row_map = row_map ; + + typedef typename CrsGraphType::entries_type graph_entries_type ; + + graph.entries = graph_entries_type( "graph_entries" , graph_entry_count ); + + //-------------------------------- + // Fill graph's entries from the (node,node) set. + + phase = FILL_GRAPH_ENTRIES ; + + Kokkos::deep_copy( row_count , 0u ); + Kokkos::parallel_for( elem_node_id.dimension_0() , *this ); + + execution_space::fence(); + + //-------------------------------- + // Done with the temporary sets and arrays + + row_total = UnsignedValue(); + row_count = RowMapType(); + row_map = RowMapType(); + + //-------------------------------- + + phase = SORT_GRAPH_ENTRIES ; + Kokkos::parallel_for( residual.dimension_0() , *this ); + + execution_space::fence(); + + phase = GATHER_FILL ; + } + + void apply() const + { + Kokkos::parallel_for( residual.dimension_0() , *this ); + } + + //------------------------------------ + //------------------------------------ + // parallel_for: Count node->element pairs + + KOKKOS_INLINE_FUNCTION + void fill_node_count( const unsigned ielem ) const + { + for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) { + + const unsigned row_node = elem_node_id( ielem , row_local_node ); + + if ( row_node < row_count.dimension_0() ) { + atomic_fetch_add( & row_count( row_node ) , 1 ); + } + } + } + + KOKKOS_INLINE_FUNCTION + void fill_graph_entries( const unsigned ielem ) const + { + for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) { + + const unsigned row_node = elem_node_id( ielem , row_local_node ); + + if ( row_node < row_count.dimension_0() ) { + + const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 ); + + graph.entries( offset , 0 ) = ielem ; + graph.entries( offset , 1 ) = row_local_node ; + } + } + } + + KOKKOS_INLINE_FUNCTION + void sort_graph_entries( const unsigned irow ) const + { + const unsigned row_beg = graph.row_map( irow ); + const unsigned row_end = graph.row_map( irow + 1 ); + for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) { + const unsigned elem = graph.entries(i,0); + const unsigned local = graph.entries(i,1); + unsigned j = i ; + for ( ; row_beg < j && elem < graph.entries(j-1,0) ; --j ) { + graph.entries(j,0) = graph.entries(j-1,0); + graph.entries(j,1) = graph.entries(j-1,1); + } + graph.entries(j,0) = elem ; + graph.entries(j,1) = local ; + } + } + + //------------------------------------ + + KOKKOS_INLINE_FUNCTION + void gather_fill( const unsigned irow ) const + { + const unsigned node_elem_begin = graph.row_map(irow); + const unsigned node_elem_end = graph.row_map(irow+1); + + // for each element that a node belongs to + + for ( unsigned i = node_elem_begin ; i < node_elem_end ; i++ ) { + + const unsigned elem_id = graph.entries( i, 0); + const unsigned row_index = graph.entries( i, 1); + + residual(irow) += elem_residual(elem_id, row_index); + + // for each node in a particular related element + // gather the contents of the element stiffness + // matrix that belong in irow + + for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) { + const unsigned A_index = elem_graph( elem_id , row_index , j ); + + jacobian.coeff( A_index ) += elem_jacobian( elem_id, row_index, j ); + } + } + } + + //------------------------------------ + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned iwork ) const + { + if ( phase == FILL_NODE_COUNT ) { + fill_node_count( iwork ); + } + else if ( phase == FILL_GRAPH_ENTRIES ) { + fill_graph_entries( iwork ); + } + else if ( phase == SORT_GRAPH_ENTRIES ) { + sort_graph_entries( iwork ); + } + else if ( phase == GATHER_FILL ) { + gather_fill( iwork ); + } + } + + //------------------------------------ + // parallel_scan: row offsets + + typedef unsigned value_type ; + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned irow , unsigned & update , const bool final ) const + { + // exclusive scan + if ( final ) { row_map( irow ) = update ; } + + update += row_count( irow ); + + if ( final ) { + if ( irow + 1 == row_count.dimension_0() ) { + row_map( irow + 1 ) = update ; + row_total() = update ; + } + } + } + + KOKKOS_INLINE_FUNCTION + void init( unsigned & update ) const { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; } +}; + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +template< class FiniteElementMeshType , class SparseMatrixType > +class ElementComputation ; + + +template< class ExecSpace , BoxElemPart::ElemOrder Order , class CoordinateMap , typename ScalarType > +class ElementComputation< + Kokkos::Example::BoxElemFixture< ExecSpace , Order , CoordinateMap > , + Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > > +{ +public: + + typedef Kokkos::Example::BoxElemFixture< ExecSpace, Order, CoordinateMap > mesh_type ; + typedef Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type ; + + typedef Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > sparse_matrix_type ; + typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ; + + typedef ExecSpace execution_space ; + typedef ScalarType scalar_type ; + + static const unsigned SpatialDim = element_data_type::spatial_dimension ; + static const unsigned TensorDim = SpatialDim * SpatialDim ; + static const unsigned ElemNodeCount = element_data_type::element_node_count ; + static const unsigned FunctionCount = element_data_type::function_count ; + static const unsigned IntegrationCount = element_data_type::integration_count ; + + //------------------------------------ + + typedef typename mesh_type::node_coord_type node_coord_type ; + typedef typename mesh_type::elem_node_type elem_node_type ; + typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ; + typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ; + typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + + typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ; + + //------------------------------------ + + + //------------------------------------ + // Computational data: + + const element_data_type elem_data ; + const elem_node_type elem_node_ids ; + const node_coord_type node_coords ; + const elem_graph_type elem_graph ; + const elem_matrices_type elem_jacobians ; + const elem_vectors_type elem_residuals ; + const vector_type solution ; + const vector_type residual ; + const sparse_matrix_type jacobian ; + const scalar_type coeff_K ; + + ElementComputation( const ElementComputation & rhs ) + : elem_data() + , elem_node_ids( rhs.elem_node_ids ) + , node_coords( rhs.node_coords ) + , elem_graph( rhs.elem_graph ) + , elem_jacobians( rhs.elem_jacobians ) + , elem_residuals( rhs.elem_residuals ) + , solution( rhs.solution ) + , residual( rhs.residual ) + , jacobian( rhs.jacobian ) + , coeff_K( rhs.coeff_K ) + {} + + // If the element->sparse_matrix graph is provided then perform atomic updates + // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian. + ElementComputation( const mesh_type & arg_mesh , + const scalar_type arg_coeff_K , + const vector_type & arg_solution , + const elem_graph_type & arg_elem_graph , + const sparse_matrix_type & arg_jacobian , + const vector_type & arg_residual ) + : elem_data() + , elem_node_ids( arg_mesh.elem_node() ) + , node_coords( arg_mesh.node_coord() ) + , elem_graph( arg_elem_graph ) + , elem_jacobians() + , elem_residuals() + , solution( arg_solution ) + , residual( arg_residual ) + , jacobian( arg_jacobian ) + , coeff_K( arg_coeff_K ) + {} + + ElementComputation( const mesh_type & arg_mesh , + const scalar_type arg_coeff_K , + const vector_type & arg_solution ) + : elem_data() + , elem_node_ids( arg_mesh.elem_node() ) + , node_coords( arg_mesh.node_coord() ) + , elem_graph() + , elem_jacobians( "elem_jacobians" , arg_mesh.elem_count() ) + , elem_residuals( "elem_residuals" , arg_mesh.elem_count() ) + , solution( arg_solution ) + , residual() + , jacobian() + , coeff_K( arg_coeff_K ) + {} + + //------------------------------------ + + void apply() const + { + parallel_for( elem_node_ids.dimension_0() , *this ); + } + + //------------------------------------ + + static const unsigned FLOPS_transform_gradients = + /* Jacobian */ FunctionCount * TensorDim * 2 + + /* Inverse jacobian */ TensorDim * 6 + 6 + + /* Gradient transform */ FunctionCount * 15 ; + + KOKKOS_INLINE_FUNCTION + float transform_gradients( + const float grad[][ FunctionCount ] , // Gradient of bases master element + const double x[] , + const double y[] , + const double z[] , + float dpsidx[] , + float dpsidy[] , + float dpsidz[] ) const + { + enum { j11 = 0 , j12 = 1 , j13 = 2 , + j21 = 3 , j22 = 4 , j23 = 5 , + j31 = 6 , j32 = 7 , j33 = 8 }; + + // Jacobian accumulation: + + double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + for( unsigned i = 0; i < FunctionCount ; ++i ) { + const double x1 = x[i] ; + const double x2 = y[i] ; + const double x3 = z[i] ; + + const float g1 = grad[0][i] ; + const float g2 = grad[1][i] ; + const float g3 = grad[2][i] ; + + J[j11] += g1 * x1 ; + J[j12] += g1 * x2 ; + J[j13] += g1 * x3 ; + + J[j21] += g2 * x1 ; + J[j22] += g2 * x2 ; + J[j23] += g2 * x3 ; + + J[j31] += g3 * x1 ; + J[j32] += g3 * x2 ; + J[j33] += g3 * x3 ; + } + + // Inverse jacobian: + + float invJ[ TensorDim ] = { + static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) , + static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) , + static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) , + + static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) , + static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) , + static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) , + + static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) , + static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) , + static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) }; + + const float detJ = J[j11] * invJ[j11] + + J[j21] * invJ[j12] + + J[j31] * invJ[j13] ; + + const float detJinv = 1.0 / detJ ; + + for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; } + + // Transform gradients: + + for( unsigned i = 0; i < FunctionCount ; ++i ) { + const float g0 = grad[0][i]; + const float g1 = grad[1][i]; + const float g2 = grad[2][i]; + + dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13]; + dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23]; + dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33]; + } + + return detJ ; + } + + KOKKOS_INLINE_FUNCTION + void contributeResidualJacobian( + const float coeff_k , + const double dof_values[] , + const float dpsidx[] , + const float dpsidy[] , + const float dpsidz[] , + const float detJ , + const float integ_weight , + const float bases_vals[] , + double elem_res[] , + double elem_mat[][ FunctionCount ] ) const + { + double value_at_pt = 0 ; + double gradx_at_pt = 0 ; + double grady_at_pt = 0 ; + double gradz_at_pt = 0 ; + + for ( unsigned m = 0 ; m < FunctionCount ; m++ ) { + value_at_pt += dof_values[m] * bases_vals[m] ; + gradx_at_pt += dof_values[m] * dpsidx[m] ; + grady_at_pt += dof_values[m] * dpsidy[m] ; + gradz_at_pt += dof_values[m] * dpsidz[m] ; + } + + const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ; + const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ; + const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ; + + // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ + // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ + + for ( unsigned m = 0; m < FunctionCount; ++m) { + double * const mat = elem_mat[m] ; + const float bases_val_m = bases_vals[m]; + const float dpsidx_m = dpsidx[m] ; + const float dpsidy_m = dpsidy[m] ; + const float dpsidz_m = dpsidz[m] ; + + elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt + + dpsidy_m * grady_at_pt + + dpsidz_m * gradz_at_pt ) + + res_val * bases_val_m ; + + for( unsigned n = 0; n < FunctionCount; n++) { + + mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] + + dpsidy_m * dpsidy[n] + + dpsidz_m * dpsidz[n] ) + + mat_val * bases_val_m * bases_vals[n]; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned ielem ) const + { + // Gather nodal coordinates and solution vector: + + double x[ FunctionCount ] ; + double y[ FunctionCount ] ; + double z[ FunctionCount ] ; + double val[ FunctionCount ] ; + unsigned node_index[ ElemNodeCount ]; + + for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) { + const unsigned ni = elem_node_ids( ielem , i ); + + node_index[i] = ni ; + + x[i] = node_coords( ni , 0 ); + y[i] = node_coords( ni , 1 ); + z[i] = node_coords( ni , 2 ); + + val[i] = solution( ni ); + } + + + double elem_vec[ FunctionCount ] ; + double elem_mat[ FunctionCount ][ FunctionCount ] ; + + for( unsigned i = 0; i < FunctionCount ; i++ ) { + elem_vec[i] = 0 ; + for( unsigned j = 0; j < FunctionCount ; j++){ + elem_mat[i][j] = 0 ; + } + } + + + for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) { + float dpsidx[ FunctionCount ] ; + float dpsidy[ FunctionCount ] ; + float dpsidz[ FunctionCount ] ; + + const float detJ = + transform_gradients( elem_data.gradients[i] , x , y , z , + dpsidx , dpsidy , dpsidz ); + + contributeResidualJacobian( coeff_K , + val , dpsidx , dpsidy , dpsidz , + detJ , + elem_data.weights[i] , + elem_data.values[i] , + elem_vec , elem_mat ); + } + +#if 0 + +if ( 1 == ielem ) { + printf("ElemResidual { %f %f %f %f %f %f %f %f }\n", + elem_vec[0], elem_vec[1], elem_vec[2], elem_vec[3], + elem_vec[4], elem_vec[5], elem_vec[6], elem_vec[7]); + + printf("ElemJacobian {\n"); + + for ( unsigned j = 0 ; j < FunctionCount ; ++j ) { + printf(" { %f %f %f %f %f %f %f %f }\n", + elem_mat[j][0], elem_mat[j][1], elem_mat[j][2], elem_mat[j][3], + elem_mat[j][4], elem_mat[j][5], elem_mat[j][6], elem_mat[j][7]); + } + printf("}\n"); +} + +#endif + + if ( ! residual.dimension_0() ) { + for( unsigned i = 0; i < FunctionCount ; i++){ + elem_residuals(ielem, i) = elem_vec[i] ; + for( unsigned j = 0; j < FunctionCount ; j++){ + elem_jacobians(ielem, i, j) = elem_mat[i][j] ; + } + } + } + else { + for( unsigned i = 0 ; i < FunctionCount ; i++ ) { + const unsigned row = node_index[i] ; + if ( row < residual.dimension_0() ) { + atomic_fetch_add( & residual( row ) , elem_vec[i] ); + + for( unsigned j = 0 ; j < FunctionCount ; j++ ) { + const unsigned entry = elem_graph( ielem , i , j ); + if ( entry != ~0u ) { + atomic_fetch_add( & jacobian.coeff( entry ) , elem_mat[i][j] ); + } + } + } + } + } + } +}; /* ElementComputation */ + +//---------------------------------------------------------------------------- + +template< class FixtureType , class SparseMatrixType > +class DirichletComputation ; + +template< class ExecSpace , BoxElemPart::ElemOrder Order , class CoordinateMap , typename ScalarType > +class DirichletComputation< + Kokkos::Example::BoxElemFixture< ExecSpace , Order , CoordinateMap > , + Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > > +{ +public: + + typedef Kokkos::Example::BoxElemFixture< ExecSpace, Order, CoordinateMap > mesh_type ; + typedef typename mesh_type::node_coord_type node_coord_type ; + typedef typename node_coord_type::value_type scalar_coord_type ; + + typedef Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > sparse_matrix_type ; + typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ; + + typedef ExecSpace execution_space ; + typedef ScalarType scalar_type ; + + //------------------------------------ + + typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + + //------------------------------------ + // Computational data: + + const node_coord_type node_coords ; + const vector_type solution ; + const sparse_matrix_type jacobian ; + const vector_type residual ; + const scalar_type bc_lower_value ; + const scalar_type bc_upper_value ; + const scalar_coord_type bc_lower_limit ; + const scalar_coord_type bc_upper_limit ; + const unsigned bc_plane ; + const unsigned node_count ; + bool init ; + + + DirichletComputation( const mesh_type & arg_mesh , + const vector_type & arg_solution , + const sparse_matrix_type & arg_jacobian , + const vector_type & arg_residual , + const unsigned arg_bc_plane , + const scalar_type arg_bc_lower_value , + const scalar_type arg_bc_upper_value ) + : node_coords( arg_mesh.node_coord() ) + , solution( arg_solution ) + , jacobian( arg_jacobian ) + , residual( arg_residual ) + , bc_lower_value( arg_bc_lower_value ) + , bc_upper_value( arg_bc_upper_value ) + , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() ) + , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() ) + , bc_plane( arg_bc_plane ) + , node_count( arg_mesh.node_count_owned() ) + , init( false ) + { + parallel_for( node_count , *this ); + init = true ; + } + + void apply() const + { + parallel_for( node_count , *this ); + } + + //------------------------------------ + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned inode ) const + { + // Apply dirichlet boundary condition on the Solution and Residual vectors. + // To maintain the symmetry of the original global stiffness matrix, + // zero out the columns that correspond to boundary conditions, and + // update the residual vector accordingly + + const unsigned iBeg = jacobian.graph.row_map[inode]; + const unsigned iEnd = jacobian.graph.row_map[inode+1]; + + const scalar_coord_type c = node_coords(inode,bc_plane); + const bool bc_lower = c <= bc_lower_limit ; + const bool bc_upper = bc_upper_limit <= c ; + + if ( ! init ) { + solution(inode) = bc_lower ? bc_lower_value : ( + bc_upper ? bc_upper_value : 0 ); + } + else { + if ( bc_lower || bc_upper ) { + + residual(inode) = 0 ; + + // zero each value on the row, and leave a one + // on the diagonal + + for( unsigned i = iBeg ; i < iEnd ; ++i ) { + jacobian.coeff(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ; + } + } + else { + + // Find any columns that are boundary conditions. + // Clear them and adjust the residual vector + + for( unsigned i = iBeg ; i < iEnd ; ++i ) { + const unsigned cnode = jacobian.graph.entries(i) ; + const scalar_coord_type cc = node_coords(cnode,bc_plane); + + if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) { + jacobian.coeff(i) = 0 ; + } + } + } + } + } +}; + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +/* A Cuda-specific specialization for the element computation functor. */ +#if defined( __CUDACC__ ) +// #include <NonlinearElement_Cuda.hpp> +#endif + +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */ + diff --git a/lib/kokkos/example/fenl/fenl_impl.hpp b/lib/kokkos/example/fenl/fenl_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..64070ce55fdc1cf7b94d631a0f29b32eecfab357 --- /dev/null +++ b/lib/kokkos/example/fenl/fenl_impl.hpp @@ -0,0 +1,598 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP +#define KOKKOS_EXAMPLE_FENL_IMPL_HPP + +#include <math.h> + +// Kokkos libraries' headers: + +#include <Kokkos_UnorderedMap.hpp> +#include <Kokkos_StaticCrsGraph.hpp> +#include <impl/Kokkos_Timer.hpp> + +// Examples headers: + +#include <BoxElemFixture.hpp> +#include <VectorImport.hpp> +#include <CGSolve.hpp> + +#include <fenl.hpp> +#include <fenl_functors.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +inline +double maximum( MPI_Comm comm , double local ) +{ + double global = local ; +#if defined( KOKKOS_HAVE_MPI ) + MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , comm ); +#endif + return global ; +} + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +class ManufacturedSolution { +public: + + // Manufactured solution for one dimensional nonlinear PDE + // + // -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax + // + // Has an analytic solution of the form: + // + // T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 ) + // + // Given T_0 and T_L compute K for this analytic solution. + // + // Two analytic solutions: + // + // Solution with singularity: + // , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + // , b( -1.0 / sqrt(T_zmin) ) + // + // Solution without singularity: + // , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + // , b( 1.0 / sqrt(T_zmin) ) + + const double zmin ; + const double zmax ; + const double T_zmin ; + const double T_zmax ; + const double a ; + const double b ; + const double K ; + + ManufacturedSolution( const double arg_zmin , + const double arg_zmax , + const double arg_T_zmin , + const double arg_T_zmax ) + : zmin( arg_zmin ) + , zmax( arg_zmax ) + , T_zmin( arg_T_zmin ) + , T_zmax( arg_T_zmax ) + , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + , b( 1.0 / sqrt(T_zmin) ) + , K( 1.0 / ( 6.0 * a * a ) ) + {} + + double operator()( const double z ) const + { + const double tmp = a * ( z - zmin ) + b ; + return 1.0 / ( tmp * tmp ); + } +}; + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { +namespace FENL { + +template < class Space , BoxElemPart::ElemOrder ElemOrder > +Perf fenl( + MPI_Comm comm , + const int use_print , + const int use_trials , + const int use_atomic , + const int use_elems[] ) +{ + typedef Kokkos::Example::BoxElemFixture< Space , ElemOrder > FixtureType ; + + typedef Kokkos::Example::CrsMatrix< double , Space > + SparseMatrixType ; + + typedef typename SparseMatrixType::StaticCrsGraphType + SparseGraphType ; + + typedef Kokkos::Example::FENL::NodeNodeGraph< typename FixtureType::elem_node_type , SparseGraphType , FixtureType::ElemNode > + NodeNodeGraphType ; + + typedef Kokkos::Example::FENL::ElementComputation< FixtureType , SparseMatrixType > + ElementComputationType ; + + typedef Kokkos::Example::FENL::DirichletComputation< FixtureType , SparseMatrixType > + DirichletComputationType ; + + typedef NodeElemGatherFill< ElementComputationType > + NodeElemGatherFillType ; + + typedef typename ElementComputationType::vector_type VectorType ; + + typedef Kokkos::Example::VectorImport< + typename FixtureType::comm_list_type , + typename FixtureType::send_nodeid_type , + VectorType > ImportType ; + + //------------------------------------ + + const unsigned newton_iteration_limit = 10 ; + const double newton_iteration_tolerance = 1e-7 ; + const unsigned cg_iteration_limit = 200 ; + const double cg_iteration_tolerance = 1e-7 ; + + //------------------------------------ + + const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ; + + int comm_rank ; + int comm_size ; + + MPI_Comm_rank( comm , & comm_rank ); + MPI_Comm_size( comm , & comm_size ); + + // Decompose by node to avoid mpi-communication for assembly + + const float bubble_x = 1.0 ; + const float bubble_y = 1.0 ; + const float bubble_z = 1.0 ; + + const FixtureType fixture( BoxElemPart::DecomposeNode , comm_size , comm_rank , + use_elems[0] , use_elems[1] , use_elems[2] , + bubble_x , bubble_y , bubble_z ); + + + { + int global_error = ! fixture.ok(); + +#if defined( KOKKOS_HAVE_MPI ) + int local_error = global_error ; + global_error = 0 ; + MPI_Allreduce( & local_error , & global_error , 1 , MPI_INT , MPI_SUM , comm ); +#endif + + if ( global_error ) { + throw std::runtime_error(std::string("Error generating finite element fixture")); + } + } + + //------------------------------------ + + const ImportType comm_nodal_import( + comm , + fixture.recv_node() , + fixture.send_node() , + fixture.send_nodeid() , + fixture.node_count_owned() , + fixture.node_count() - fixture.node_count_owned() ); + + //------------------------------------ + + const double bc_lower_value = 1 ; + const double bc_upper_value = 2 ; + + const Kokkos::Example::FENL::ManufacturedSolution + manufactured_solution( 0 , 1 , bc_lower_value , bc_upper_value ); + + //------------------------------------ + + for ( int k = 0 ; k < comm_size && use_print ; ++k ) { + if ( k == comm_rank ) { + typename FixtureType::node_grid_type::HostMirror + h_node_grid = Kokkos::create_mirror_view( fixture.node_grid() ); + + typename FixtureType::node_coord_type::HostMirror + h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() ); + + typename FixtureType::elem_node_type::HostMirror + h_elem_node = Kokkos::create_mirror_view( fixture.elem_node() ); + + Kokkos::deep_copy( h_node_grid , fixture.node_grid() ); + Kokkos::deep_copy( h_node_coord , fixture.node_coord() ); + Kokkos::deep_copy( h_elem_node , fixture.elem_node() ); + + std::cout << "MPI[" << comm_rank << "]" << std::endl ; + std::cout << "Node grid {" ; + for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) { + std::cout << " (" << h_node_grid(inode,0) + << "," << h_node_grid(inode,1) + << "," << h_node_grid(inode,2) + << ")" ; + } + std::cout << " }" << std::endl ; + + std::cout << "Node coord {" ; + for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) { + std::cout << " (" << h_node_coord(inode,0) + << "," << h_node_coord(inode,1) + << "," << h_node_coord(inode,2) + << ")" ; + } + std::cout << " }" << std::endl ; + + std::cout << "Manufactured solution" + << " a[" << manufactured_solution.a << "]" + << " b[" << manufactured_solution.b << "]" + << " K[" << manufactured_solution.K << "]" + << " {" ; + for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) { + std::cout << " " << manufactured_solution( h_node_coord( inode , 2 ) ); + } + std::cout << " }" << std::endl ; + + std::cout << "ElemNode {" << std::endl ; + for ( unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) { + std::cout << " elem[" << ielem << "]{" ; + for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) { + std::cout << " " << h_elem_node(ielem,inode); + } + std::cout << " }{" ; + for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) { + std::cout << " (" << h_node_grid(h_elem_node(ielem,inode),0) + << "," << h_node_grid(h_elem_node(ielem,inode),1) + << "," << h_node_grid(h_elem_node(ielem,inode),2) + << ")" ; + } + std::cout << " }" << std::endl ; + } + std::cout << "}" << std::endl ; + } + std::cout.flush(); + MPI_Barrier( comm ); + } + + //------------------------------------ + + Kokkos::Timer wall_clock ; + + Perf perf_stats = Perf() ; + + for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) { + + Perf perf = Perf() ; + + perf.global_elem_count = fixture.elem_count_global(); + perf.global_node_count = fixture.node_count_global(); + + //---------------------------------- + // Create the sparse matrix graph and element-to-graph map + // from the element->to->node identifier array. + // The graph only has rows for the owned nodes. + + typename NodeNodeGraphType::Times graph_times; + + const NodeNodeGraphType + mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(), graph_times ); + + perf.map_ratio = maximum(comm, graph_times.ratio); + perf.fill_node_set = maximum(comm, graph_times.fill_node_set); + perf.scan_node_count = maximum(comm, graph_times.scan_node_count); + perf.fill_graph_entries = maximum(comm, graph_times.fill_graph_entries); + perf.sort_graph_entries = maximum(comm, graph_times.sort_graph_entries); + perf.fill_element_graph = maximum(comm, graph_times.fill_element_graph); + + wall_clock.reset(); + // Create the sparse matrix from the graph: + + SparseMatrixType jacobian( mesh_to_graph.graph ); + + Space::fence(); + + perf.create_sparse_matrix = maximum( comm , wall_clock.seconds() ); + + //---------------------------------- + + for ( int k = 0 ; k < comm_size && print_flag ; ++k ) { + if ( k == comm_rank ) { + const unsigned nrow = jacobian.graph.numRows(); + std::cout << "MPI[" << comm_rank << "]" << std::endl ; + std::cout << "JacobianGraph {" << std::endl ; + for ( unsigned irow = 0 ; irow < nrow ; ++irow ) { + std::cout << " row[" << irow << "]{" ; + const unsigned entry_end = jacobian.graph.row_map(irow+1); + for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) { + std::cout << " " << jacobian.graph.entries(entry); + } + std::cout << " }" << std::endl ; + } + std::cout << "}" << std::endl ; + + std::cout << "ElemGraph {" << std::endl ; + for ( unsigned ielem = 0 ; ielem < mesh_to_graph.elem_graph.dimension_0() ; ++ielem ) { + std::cout << " elem[" << ielem << "]{" ; + for ( unsigned irow = 0 ; irow < mesh_to_graph.elem_graph.dimension_1() ; ++irow ) { + std::cout << " {" ; + for ( unsigned icol = 0 ; icol < mesh_to_graph.elem_graph.dimension_2() ; ++icol ) { + std::cout << " " << mesh_to_graph.elem_graph(ielem,irow,icol); + } + std::cout << " }" ; + } + std::cout << " }" << std::endl ; + } + std::cout << "}" << std::endl ; + } + std::cout.flush(); + MPI_Barrier( comm ); + } + + //---------------------------------- + + // Allocate solution vector for each node in the mesh and residual vector for each owned node + const VectorType nodal_solution( "nodal_solution" , fixture.node_count() ); + const VectorType nodal_residual( "nodal_residual" , fixture.node_count_owned() ); + const VectorType nodal_delta( "nodal_delta" , fixture.node_count_owned() ); + + // Create element computation functor + const ElementComputationType elemcomp( + use_atomic ? ElementComputationType( fixture , manufactured_solution.K , nodal_solution , + mesh_to_graph.elem_graph , jacobian , nodal_residual ) + : ElementComputationType( fixture , manufactured_solution.K , nodal_solution ) ); + + const NodeElemGatherFillType gatherfill( + use_atomic ? NodeElemGatherFillType() + : NodeElemGatherFillType( fixture.elem_node() , + mesh_to_graph.elem_graph , + nodal_residual , + jacobian , + elemcomp.elem_residuals , + elemcomp.elem_jacobians ) ); + + // Create boundary condition functor + const DirichletComputationType dirichlet( + fixture , nodal_solution , jacobian , nodal_residual , + 2 /* apply at 'z' ends */ , + manufactured_solution.T_zmin , + manufactured_solution.T_zmax ); + + //---------------------------------- + // Nonlinear Newton iteration: + + double residual_norm_init = 0 ; + + for ( perf.newton_iter_count = 0 ; + perf.newton_iter_count < newton_iteration_limit ; + ++perf.newton_iter_count ) { + + //-------------------------------- + + comm_nodal_import( nodal_solution ); + + //-------------------------------- + // Element contributions to residual and jacobian + + wall_clock.reset(); + + Kokkos::deep_copy( nodal_residual , double(0) ); + Kokkos::deep_copy( jacobian.coeff , double(0) ); + + elemcomp.apply(); + + if ( ! use_atomic ) { + gatherfill.apply(); + } + + Space::fence(); + perf.fill_time = maximum( comm , wall_clock.seconds() ); + + //-------------------------------- + // Apply boundary conditions + + wall_clock.reset(); + + dirichlet.apply(); + + Space::fence(); + perf.bc_time = maximum( comm , wall_clock.seconds() ); + + //-------------------------------- + // Evaluate convergence + + const double residual_norm = + std::sqrt( + Kokkos::Example::all_reduce( + Kokkos::Example::dot( fixture.node_count_owned() , nodal_residual, nodal_residual ) , comm ) ); + + perf.newton_residual = residual_norm ; + + if ( 0 == perf.newton_iter_count ) { residual_norm_init = residual_norm ; } + + if ( residual_norm < residual_norm_init * newton_iteration_tolerance ) { break ; } + + //-------------------------------- + // Solve for nonlinear update + + CGSolveResult cg_result ; + + Kokkos::Example::cgsolve( comm_nodal_import + , jacobian + , nodal_residual + , nodal_delta + , cg_iteration_limit + , cg_iteration_tolerance + , & cg_result + ); + + // Update solution vector + + Kokkos::Example::waxpby( fixture.node_count_owned() , nodal_solution , -1.0 , nodal_delta , 1.0 , nodal_solution ); + + perf.cg_iter_count += cg_result.iteration ; + perf.matvec_time += cg_result.matvec_time ; + perf.cg_time += cg_result.iter_time ; + + //-------------------------------- + + if ( print_flag ) { + const double delta_norm = + std::sqrt( + Kokkos::Example::all_reduce( + Kokkos::Example::dot( fixture.node_count_owned() , nodal_delta, nodal_delta ) , comm ) ); + + if ( 0 == comm_rank ) { + std::cout << "Newton iteration[" << perf.newton_iter_count << "]" + << " residual[" << perf.newton_residual << "]" + << " update[" << delta_norm << "]" + << " cg_iteration[" << cg_result.iteration << "]" + << " cg_residual[" << cg_result.norm_res << "]" + << std::endl ; + } + + for ( int k = 0 ; k < comm_size ; ++k ) { + if ( k == comm_rank ) { + const unsigned nrow = jacobian.graph.numRows(); + + std::cout << "MPI[" << comm_rank << "]" << std::endl ; + std::cout << "Residual {" ; + for ( unsigned irow = 0 ; irow < nrow ; ++irow ) { + std::cout << " " << nodal_residual(irow); + } + std::cout << " }" << std::endl ; + + std::cout << "Delta {" ; + for ( unsigned irow = 0 ; irow < nrow ; ++irow ) { + std::cout << " " << nodal_delta(irow); + } + std::cout << " }" << std::endl ; + + std::cout << "Solution {" ; + for ( unsigned irow = 0 ; irow < nrow ; ++irow ) { + std::cout << " " << nodal_solution(irow); + } + std::cout << " }" << std::endl ; + + std::cout << "Jacobian[ " + << jacobian.graph.numRows() << " x " << Kokkos::maximum_entry( jacobian.graph ) + << " ] {" << std::endl ; + for ( unsigned irow = 0 ; irow < nrow ; ++irow ) { + std::cout << " {" ; + const unsigned entry_end = jacobian.graph.row_map(irow+1); + for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) { + std::cout << " (" << jacobian.graph.entries(entry) + << "," << jacobian.coeff(entry) + << ")" ; + } + std::cout << " }" << std::endl ; + } + std::cout << "}" << std::endl ; + } + std::cout.flush(); + MPI_Barrier( comm ); + } + } + //-------------------------------- + } + + // Evaluate solution error + + if ( 0 == itrial ) { + const typename FixtureType::node_coord_type::HostMirror + h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() ); + + const typename VectorType::HostMirror + h_nodal_solution = Kokkos::create_mirror_view( nodal_solution ); + + Kokkos::deep_copy( h_node_coord , fixture.node_coord() ); + Kokkos::deep_copy( h_nodal_solution , nodal_solution ); + + double error_max = 0 ; + for ( unsigned inode = 0 ; inode < fixture.node_count_owned() ; ++inode ) { + const double answer = manufactured_solution( h_node_coord( inode , 2 ) ); + const double error = ( h_nodal_solution(inode) - answer ) / answer ; + if ( error_max < fabs( error ) ) { error_max = fabs( error ); } + } + + perf.error_max = std::sqrt( Kokkos::Example::all_reduce_max( error_max , comm ) ); + + perf_stats = perf ; + } + else { + perf_stats.fill_node_set = std::min( perf_stats.fill_node_set , perf.fill_node_set ); + perf_stats.scan_node_count = std::min( perf_stats.scan_node_count , perf.scan_node_count ); + perf_stats.fill_graph_entries = std::min( perf_stats.fill_graph_entries , perf.fill_graph_entries ); + perf_stats.sort_graph_entries = std::min( perf_stats.sort_graph_entries , perf.sort_graph_entries ); + perf_stats.fill_element_graph = std::min( perf_stats.fill_element_graph , perf.fill_element_graph ); + perf_stats.create_sparse_matrix = std::min( perf_stats.create_sparse_matrix , perf.create_sparse_matrix ); + perf_stats.fill_time = std::min( perf_stats.fill_time , perf.fill_time ); + perf_stats.bc_time = std::min( perf_stats.bc_time , perf.bc_time ); + perf_stats.cg_time = std::min( perf_stats.cg_time , perf.cg_time ); + } + } + + return perf_stats ; +} + +} /* namespace FENL */ +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP */ + diff --git a/lib/kokkos/example/fenl/main.cpp b/lib/kokkos/example/fenl/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..06005d97cb17d20fcf8759f08f76823a936ee558 --- /dev/null +++ b/lib/kokkos/example/fenl/main.cpp @@ -0,0 +1,422 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include <utility> +#include <string> +#include <vector> +#include <sstream> +#include <iostream> +#include <iomanip> + +#include <Kokkos_Core.hpp> + +#include <WrapMPI.hpp> +#include <fenl.hpp> + +// For vtune +#include <sys/types.h> +#include <unistd.h> + +//---------------------------------------------------------------------------- + +enum { CMD_USE_THREADS = 0 + , CMD_USE_NUMA + , CMD_USE_CORE_PER_NUMA + , CMD_USE_CUDA + , CMD_USE_OPENMP + , CMD_USE_CUDA_DEV + , CMD_USE_FIXTURE_X + , CMD_USE_FIXTURE_Y + , CMD_USE_FIXTURE_Z + , CMD_USE_FIXTURE_BEGIN + , CMD_USE_FIXTURE_END + , CMD_USE_FIXTURE_QUADRATIC + , CMD_USE_ATOMIC + , CMD_USE_TRIALS + , CMD_VTUNE + , CMD_PRINT + , CMD_ECHO + , CMD_ERROR + , CMD_COUNT }; + +void print_cmdline( std::ostream & s , const int cmd[] ) +{ + if ( cmd[ CMD_USE_THREADS ] ) { + s << " Threads(" << cmd[ CMD_USE_THREADS ] + << ") NUMA(" << cmd[ CMD_USE_NUMA ] + << ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ] + << ")" ; + } + if ( cmd[ CMD_USE_OPENMP ] ) { + s << " OpenMP(" << cmd[ CMD_USE_OPENMP ] + << ") NUMA(" << cmd[ CMD_USE_NUMA ] + << ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ] + << ")" ; + } + if ( cmd[ CMD_USE_FIXTURE_X ] ) { + s << " Fixture(" << cmd[ CMD_USE_FIXTURE_X ] + << "x" << cmd[ CMD_USE_FIXTURE_Y ] + << "x" << cmd[ CMD_USE_FIXTURE_Z ] + << ")" ; + } + if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) { + s << " Fixture( " << cmd[ CMD_USE_FIXTURE_BEGIN ] + << " .. " << cmd[ CMD_USE_FIXTURE_END ] + << " )" ; + } + if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { + s << " Quadratic-Element" ; + } + if ( cmd[ CMD_USE_CUDA ] ) { + s << " CUDA(" << cmd[ CMD_USE_CUDA_DEV ] << ")" ; + } + if ( cmd[ CMD_USE_ATOMIC ] ) { + s << " ATOMIC" ; + } + if ( cmd[ CMD_USE_TRIALS ] ) { + s << " TRIALS(" << cmd[ CMD_USE_TRIALS ] << ")" ; + } + if ( cmd[ CMD_VTUNE ] ) { + s << " VTUNE" ; + } + if ( cmd[ CMD_PRINT ] ) { + s << " PRINT" ; + } + s << std::endl ; +} + +void print_perf_value( std::ostream & s , const std::vector<size_t> & widths, const Kokkos::Example::FENL::Perf & perf ) +{ + int i=0; + s << std::setw(widths[i++]) << perf.global_elem_count << " ,"; + s << std::setw(widths[i++]) << perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << perf.newton_iter_count << " ,"; + s << std::setw(widths[i++]) << perf.cg_iter_count << " ,"; + s << std::setw(widths[i++]) << perf.map_ratio << " ,"; + s << std::setw(widths[i++]) << ( perf.fill_node_set * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.scan_node_count * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.fill_graph_entries * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.sort_graph_entries * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.fill_element_graph * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.create_sparse_matrix * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.fill_time * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( perf.bc_time * 1000.0 ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( ( perf.matvec_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,"; + s << std::setw(widths[i++]) << ( ( perf.cg_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,"; + s << std::setw(widths[i]) << perf.error_max; + s << std::endl ; +} + +template< class Device , Kokkos::Example::BoxElemPart::ElemOrder ElemOrder > +void run( MPI_Comm comm , const int cmd[] ) +{ + int comm_rank = 0 ; + +#if defined( KOKKOS_HAVE_MPI ) + MPI_Comm_rank( comm , & comm_rank ); +#else + comm = 0 ; +#endif + + + if ( 0 == comm_rank ) { + if ( cmd[ CMD_USE_THREADS ] ) { std::cout << "THREADS , " << cmd[ CMD_USE_THREADS ] ; } + else if ( cmd[ CMD_USE_OPENMP ] ) { std::cout << "OPENMP , " << cmd[ CMD_USE_OPENMP ] ; } + else if ( cmd[ CMD_USE_CUDA ] ) { std::cout << "CUDA" ; } + + if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { std::cout << " , QUADRATIC-ELEMENT" ; } + else { std::cout << " , LINEAR-ELEMENT" ; } + + if ( cmd[ CMD_USE_ATOMIC ] ) { std::cout << " , USING ATOMICS" ; } + } + + std::vector< std::pair<std::string,std::string> > headers; + + + headers.push_back(std::make_pair("ELEMS","count")); + headers.push_back(std::make_pair("NODES","count")); + headers.push_back(std::make_pair("NEWTON","iter")); + headers.push_back(std::make_pair("CG","iter")); + headers.push_back(std::make_pair("MAP_RATIO","ratio")); + headers.push_back(std::make_pair("SET_FILL/NODE","millisec")); + headers.push_back(std::make_pair("SCAN/NODE","millisec")); + headers.push_back(std::make_pair("GRAPH_FILL/NODE","millisec")); + headers.push_back(std::make_pair("SORT/NODE","millisec")); + headers.push_back(std::make_pair("ELEM_GRAPH_FILL/NODE","millisec")); + headers.push_back(std::make_pair("MATRIX_CREATE/NODE","millisec")); + headers.push_back(std::make_pair("MATRIX_FILL/NODE","millisec")); + headers.push_back(std::make_pair("BOUNDARY/NODE","millisec")); + headers.push_back(std::make_pair("MAT_VEC/ITER/ROW","millisec")); + headers.push_back(std::make_pair("CG/ITER/ROW","millisec")); + headers.push_back(std::make_pair("ERROR","ratio")); + + // find print widths + size_t min_width = 10; + std::vector< size_t > widths(headers.size()); + for (size_t i=0, ie=headers.size(); i<ie; ++i) + widths[i] = std::max(min_width, headers[i].first.size()+1); + + // print column headers + if ( 0 == comm_rank ) { + std::cout << std::endl ; + for (size_t i=0; i<headers.size(); ++i) + std::cout << std::setw(widths[i]) << headers[i].first << " ,"; + std::cout << "\b\b " << std::endl; + for (size_t i=0; i<headers.size(); ++i) + std::cout << std::setw(widths[i]) << headers[i].second << " ,"; + std::cout << "\b\b " << std::endl; + + std::cout << std::scientific; + std::cout.precision(3); + } + + if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) { + for ( int i = cmd[CMD_USE_FIXTURE_BEGIN] ; i < cmd[CMD_USE_FIXTURE_END] * 2 ; i *= 2 ) { + int nelem[3] ; + nelem[0] = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) ); + nelem[1] = 1 + nelem[0] ; + nelem[2] = 2 * nelem[0] ; + + const Kokkos::Example::FENL::Perf perf = + cmd[ CMD_USE_FIXTURE_QUADRATIC ] + ? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic > + ( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem ) + : Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear > + ( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem ) + ; + + if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf ); + } + } + else { + int nelem[3] = { cmd[ CMD_USE_FIXTURE_X ] , + cmd[ CMD_USE_FIXTURE_Y ] , + cmd[ CMD_USE_FIXTURE_Z ] }; + + const Kokkos::Example::FENL::Perf perf = + cmd[ CMD_USE_FIXTURE_QUADRATIC ] + ? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic > + ( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem ) + : Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear > + ( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem ) + ; + + if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf ); + } +} + +//---------------------------------------------------------------------------- + +int main( int argc , char ** argv ) +{ + int comm_rank = 0 ; + +#if defined( KOKKOS_HAVE_MPI ) + MPI_Init( & argc , & argv ); + MPI_Comm comm = MPI_COMM_WORLD ; + MPI_Comm_rank( comm , & comm_rank ); +#else + MPI_Comm comm = 0 ; + (void) comm ; // suppress warning +#endif + + int cmdline[ CMD_COUNT ] ; + + for ( int i = 0 ; i < CMD_COUNT ; ++i ) cmdline[i] = 0 ; + + if ( 0 == comm_rank ) { + for ( int i = 1 ; i < argc ; ++i ) { + if ( 0 == strcasecmp( argv[i] , "threads" ) ) { + cmdline[ CMD_USE_THREADS ] = atoi( argv[++i] ); + } + else if ( 0 == strcasecmp( argv[i] , "openmp" ) ) { + cmdline[ CMD_USE_OPENMP ] = atoi( argv[++i] ); + } + else if ( 0 == strcasecmp( argv[i] , "cores" ) ) { + sscanf( argv[++i] , "%dx%d" , + cmdline + CMD_USE_NUMA , + cmdline + CMD_USE_CORE_PER_NUMA ); + } + else if ( 0 == strcasecmp( argv[i] , "cuda" ) ) { + cmdline[ CMD_USE_CUDA ] = 1 ; + } + else if ( 0 == strcasecmp( argv[i] , "cuda-dev" ) ) { + cmdline[ CMD_USE_CUDA ] = 1 ; + cmdline[ CMD_USE_CUDA_DEV ] = atoi( argv[++i] ) ; + } + else if ( 0 == strcasecmp( argv[i] , "fixture" ) ) { + sscanf( argv[++i] , "%dx%dx%d" , + cmdline + CMD_USE_FIXTURE_X , + cmdline + CMD_USE_FIXTURE_Y , + cmdline + CMD_USE_FIXTURE_Z ); + } + else if ( 0 == strcasecmp( argv[i] , "fixture-range" ) ) { + sscanf( argv[++i] , "%d..%d" , + cmdline + CMD_USE_FIXTURE_BEGIN , + cmdline + CMD_USE_FIXTURE_END ); + } + else if ( 0 == strcasecmp( argv[i] , "fixture-quadratic" ) ) { + cmdline[ CMD_USE_FIXTURE_QUADRATIC ] = 1 ; + } + else if ( 0 == strcasecmp( argv[i] , "atomic" ) ) { + cmdline[ CMD_USE_ATOMIC ] = 1 ; + } + else if ( 0 == strcasecmp( argv[i] , "trials" ) ) { + cmdline[ CMD_USE_TRIALS ] = atoi( argv[++i] ) ; + } + else if ( 0 == strcasecmp( argv[i] , "vtune" ) ) { + cmdline[ CMD_VTUNE ] = 1 ; + } + else if ( 0 == strcasecmp( argv[i] , "print" ) ) { + cmdline[ CMD_PRINT ] = 1 ; + } + else if ( 0 == strcasecmp( argv[i] , "echo" ) ) { + cmdline[ CMD_ECHO ] = 1 ; + } + else { + cmdline[ CMD_ERROR ] = 1 ; + + std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ; + } + } + + if ( cmdline[ CMD_ECHO ] && 0 == comm_rank ) { print_cmdline( std::cout , cmdline ); } + } + +#if defined( KOKKOS_HAVE_MPI ) + MPI_Bcast( cmdline , CMD_COUNT , MPI_INT , 0 , comm ); +#endif + + if ( cmdline[ CMD_VTUNE ] ) { + std::stringstream cmd; + pid_t my_os_pid=getpid(); + const std::string vtune_loc = + "/usr/local/intel/vtune_amplifier_xe_2013/bin64/amplxe-cl"; + const std::string output_dir = "./vtune/vtune."; + const int p_rank = comm_rank; + cmd << vtune_loc + << " -collect hotspots -result-dir " << output_dir << p_rank + << " -target-pid " << my_os_pid << " &"; + if (p_rank == 0) + std::cout << cmd.str() << std::endl; + system(cmd.str().c_str()); + system("sleep 10"); + } + + if ( ! cmdline[ CMD_ERROR ] && ! cmdline[ CMD_ECHO ] ) { + + if ( ! cmdline[ CMD_USE_TRIALS ] ) { cmdline[ CMD_USE_TRIALS ] = 1 ; } + + if ( ! cmdline[ CMD_USE_FIXTURE_X ] && ! cmdline[ CMD_USE_FIXTURE_BEGIN ] ) { + cmdline[ CMD_USE_FIXTURE_X ] = 2 ; + cmdline[ CMD_USE_FIXTURE_Y ] = 2 ; + cmdline[ CMD_USE_FIXTURE_Z ] = 2 ; + } + +#if defined( KOKKOS_HAVE_PTHREAD ) + + if ( cmdline[ CMD_USE_THREADS ] ) { + + if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) { + Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] , + cmdline[ CMD_USE_NUMA ] , + cmdline[ CMD_USE_CORE_PER_NUMA ] ); + } + else { + Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] ); + } + + run< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline ); + + Kokkos::Threads::finalize(); + } + +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + + if ( cmdline[ CMD_USE_OPENMP ] ) { + + if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) { + Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] , + cmdline[ CMD_USE_NUMA ] , + cmdline[ CMD_USE_CORE_PER_NUMA ] ); + } + else { + Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] ); + } + + run< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline ); + + Kokkos::OpenMP::finalize(); + } + +#endif + +#if defined( KOKKOS_HAVE_CUDA ) + if ( cmdline[ CMD_USE_CUDA ] ) { + // Use the last device: + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cmdline[ CMD_USE_CUDA_DEV ] ) ); + + run< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline ); + + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } + +#endif + + } + +#if defined( KOKKOS_HAVE_MPI ) + MPI_Finalize(); +#endif + + return cmdline[ CMD_ERROR ] ? -1 : 0 ; +} + diff --git a/lib/kokkos/example/fixture/BoxElemFixture.hpp b/lib/kokkos/example/fixture/BoxElemFixture.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66d6e741afcc40d6e7b838bb0712ab5c1652ffe8 --- /dev/null +++ b/lib/kokkos/example/fixture/BoxElemFixture.hpp @@ -0,0 +1,355 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP +#define KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP + +#include <stdio.h> +#include <utility> + +#include <Kokkos_Core.hpp> + +#include <HexElement.hpp> +#include <BoxElemPart.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +/** \brief Map a grid onto a unit cube with smooth nonlinear grading + * of the map. + */ +struct MapGridUnitCube { + + const float m_a ; + const float m_b ; + const float m_c ; + const size_t m_max_x ; + const size_t m_max_y ; + const size_t m_max_z ; + + MapGridUnitCube( const size_t grid_max_x , + const size_t grid_max_y , + const size_t grid_max_z , + const float bubble_x , + const float bubble_y , + const float bubble_z ) + : m_a( bubble_x ) + , m_b( bubble_y ) + , m_c( bubble_z ) + , m_max_x( grid_max_x ) + , m_max_y( grid_max_y ) + , m_max_z( grid_max_z ) + {} + + template< typename Scalar > + KOKKOS_INLINE_FUNCTION + void operator()( int grid_x , + int grid_y , + int grid_z , + Scalar & coord_x , + Scalar & coord_y , + Scalar & coord_z ) const + { + // Map to a unit cube [0,1]^3 + + const double x = double(grid_x) / double(m_max_x); + const double y = double(grid_y) / double(m_max_y); + const double z = double(grid_z) / double(m_max_z); + + coord_x = x + x * x * ( x - 1 ) * ( x - 1 ) * m_a ; + coord_y = y + y * y * ( y - 1 ) * ( y - 1 ) * m_b ; + coord_z = z + z * z * ( z - 1 ) * ( z - 1 ) * m_c ; + } +}; + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +/** \brief Generate a distributed unstructured finite element mesh + * from a partitioned NX*NY*NZ box of elements. + * + * Order owned nodes first followed by off-process nodes + * grouped by owning process. + */ +template< class Device , + BoxElemPart::ElemOrder Order , + class CoordinateMap = MapGridUnitCube > +class BoxElemFixture { +public: + + typedef Device execution_space ; + + enum { SpaceDim = 3 }; + enum { ElemNode = Order == BoxElemPart::ElemLinear ? 8 : + Order == BoxElemPart::ElemQuadratic ? 27 : 0 }; + +private: + + typedef Kokkos::Example::HexElement_TensorData< ElemNode > hex_data ; + + Kokkos::Example::BoxElemPart m_box_part ; + CoordinateMap m_coord_map ; + + Kokkos::View< double *[SpaceDim] , Device > m_node_coord ; + Kokkos::View< size_t *[SpaceDim] , Device > m_node_grid ; + Kokkos::View< size_t *[ElemNode] , Device > m_elem_node ; + Kokkos::View< size_t *[2] , Device > m_recv_node ; + Kokkos::View< size_t *[2] , Device > m_send_node ; + Kokkos::View< size_t * , Device > m_send_node_id ; + + unsigned char m_elem_node_local[ ElemNode ][4] ; + +public: + + typedef Kokkos::View< const size_t * [ElemNode], Device > elem_node_type ; + typedef Kokkos::View< const double * [SpaceDim], Device > node_coord_type ; + typedef Kokkos::View< const size_t * [SpaceDim], Device > node_grid_type ; + typedef Kokkos::View< const size_t * [2] , Device > comm_list_type ; + typedef Kokkos::View< const size_t * , Device > send_nodeid_type ; + + inline bool ok() const { return m_box_part.ok(); } + + KOKKOS_INLINE_FUNCTION + size_t node_count() const { return m_node_grid.dimension_0(); } + + KOKKOS_INLINE_FUNCTION + size_t node_count_owned() const { return m_box_part.owns_node_count(); } + + KOKKOS_INLINE_FUNCTION + size_t node_count_global() const { return m_box_part.global_node_count(); } + + KOKKOS_INLINE_FUNCTION + size_t elem_count() const { return m_elem_node.dimension_0(); } + + KOKKOS_INLINE_FUNCTION + size_t elem_count_global() const { return m_box_part.global_elem_count(); } + + KOKKOS_INLINE_FUNCTION + size_t elem_node_local( size_t inode , int k ) const + { return m_elem_node_local[inode][k] ; } + + KOKKOS_INLINE_FUNCTION + size_t node_grid( size_t inode , int iaxis ) const + { return m_node_grid(inode,iaxis); } + + KOKKOS_INLINE_FUNCTION + size_t node_global_index( size_t local ) const + { + const size_t tmp_node_grid[SpaceDim] = + { m_node_grid(local,0) , m_node_grid(local,1) , m_node_grid(local,2) }; + return m_box_part.global_node_id( tmp_node_grid ); + } + + KOKKOS_INLINE_FUNCTION + double node_coord( size_t inode , int iaxis ) const + { return m_node_coord(inode,iaxis); } + + KOKKOS_INLINE_FUNCTION + size_t node_grid_max( int iaxis ) const + { return m_box_part.global_coord_max(iaxis); } + + KOKKOS_INLINE_FUNCTION + size_t elem_node( size_t ielem , size_t inode ) const + { return m_elem_node(ielem,inode); } + + elem_node_type elem_node() const { return m_elem_node ; } + node_coord_type node_coord() const { return m_node_coord ; } + node_grid_type node_grid() const { return m_node_grid ; } + comm_list_type recv_node() const { return m_recv_node ; } + comm_list_type send_node() const { return m_send_node ; } + send_nodeid_type send_nodeid() const { return m_send_node_id ; } + + KOKKOS_INLINE_FUNCTION + BoxElemFixture( const BoxElemFixture & rhs ) + : m_box_part( rhs.m_box_part ) + , m_coord_map( rhs.m_coord_map ) + , m_node_coord( rhs.m_node_coord ) + , m_node_grid( rhs.m_node_grid ) + , m_elem_node( rhs.m_elem_node ) + , m_recv_node( rhs.m_recv_node ) + , m_send_node( rhs.m_send_node ) + , m_send_node_id( rhs.m_send_node_id ) + { + for ( int i = 0 ; i < ElemNode ; ++i ) { + m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ; + m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ; + m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ; + m_elem_node_local[i][3] = 0 ; + } + } + + BoxElemFixture & operator = ( const BoxElemFixture & rhs ) + { + m_box_part = rhs.m_box_part ; + m_coord_map = rhs.m_coord_map ; + m_node_coord = rhs.m_node_coord ; + m_node_grid = rhs.m_node_grid ; + m_elem_node = rhs.m_elem_node ; + m_recv_node = rhs.m_recv_node ; + m_send_node = rhs.m_send_node ; + m_send_node_id = rhs.m_send_node_id ; + + for ( int i = 0 ; i < ElemNode ; ++i ) { + m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ; + m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ; + m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ; + m_elem_node_local[i][3] = 0 ; + } + return *this ; + } + + BoxElemFixture( const BoxElemPart::Decompose decompose , + const size_t global_size , + const size_t global_rank , + const size_t elem_nx , + const size_t elem_ny , + const size_t elem_nz , + const float bubble_x = 1.1f , + const float bubble_y = 1.2f , + const float bubble_z = 1.3f ) + : m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz ) + , m_coord_map( m_box_part.global_coord_max(0) , + m_box_part.global_coord_max(1) , + m_box_part.global_coord_max(2) , + bubble_x , + bubble_y , + bubble_z ) + , m_node_coord( "fixture_node_coord" , m_box_part.uses_node_count() ) + , m_node_grid( "fixture_node_grid" , m_box_part.uses_node_count() ) + , m_elem_node( "fixture_elem_node" , m_box_part.uses_elem_count() ) + , m_recv_node( "fixture_recv_node" , m_box_part.recv_node_msg_count() ) + , m_send_node( "fixture_send_node" , m_box_part.send_node_msg_count() ) + , m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() ) + { + { + const hex_data elem_data ; + + for ( int i = 0 ; i < ElemNode ; ++i ) { + m_elem_node_local[i][0] = elem_data.eval_map[i][0] ; + m_elem_node_local[i][1] = elem_data.eval_map[i][1] ; + m_elem_node_local[i][2] = elem_data.eval_map[i][2] ; + m_elem_node_local[i][3] = 0 ; + } + } + + const size_t nwork = + std::max( m_recv_node.dimension_0() , + std::max( m_send_node.dimension_0() , + std::max( m_send_node_id.dimension_0() , + std::max( m_node_grid.dimension_0() , + m_elem_node.dimension_0() * m_elem_node.dimension_1() )))); + + Kokkos::parallel_for( nwork , *this ); + } + + + // Initialization: + + KOKKOS_INLINE_FUNCTION + void operator()( size_t i ) const + { + if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) { + + const size_t ielem = i / ElemNode ; + const size_t inode = i % ElemNode ; + + size_t elem_grid[SpaceDim] ; + size_t tmp_node_grid[SpaceDim] ; + + m_box_part.uses_elem_coord( ielem , elem_grid ); + + enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 : + Order == BoxElemPart::ElemQuadratic ? 2 : 0 }; + + tmp_node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ; + tmp_node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ; + tmp_node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ; + + m_elem_node(ielem,inode) = m_box_part.local_node_id( tmp_node_grid ); + } + + if ( i < m_node_grid.dimension_0() ) { + size_t tmp_node_grid[SpaceDim] ; + m_box_part.local_node_coord( i , tmp_node_grid ); + m_node_grid(i,0) = tmp_node_grid[0] ; + m_node_grid(i,1) = tmp_node_grid[1] ; + m_node_grid(i,2) = tmp_node_grid[2] ; + + m_coord_map( tmp_node_grid[0] , + tmp_node_grid[1] , + tmp_node_grid[2] , + m_node_coord(i,0) , + m_node_coord(i,1) , + m_node_coord(i,2) ); + } + + if ( i < m_recv_node.dimension_0() ) { + m_recv_node(i,0) = m_box_part.recv_node_rank(i); + m_recv_node(i,1) = m_box_part.recv_node_count(i); + } + + if ( i < m_send_node.dimension_0() ) { + m_send_node(i,0) = m_box_part.send_node_rank(i); + m_send_node(i,1) = m_box_part.send_node_count(i); + } + + if ( i < m_send_node_id.dimension_0() ) { + m_send_node_id(i) = m_box_part.send_node_id(i); + } + } +}; + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP */ + diff --git a/lib/kokkos/example/fixture/BoxElemPart.cpp b/lib/kokkos/example/fixture/BoxElemPart.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fe89246689ea41c1157035231e34c9f3a94dfceb --- /dev/null +++ b/lib/kokkos/example/fixture/BoxElemPart.cpp @@ -0,0 +1,413 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <utility> +#include <iostream> +#include <sstream> +#include <stdexcept> +#include <limits> +#include <BoxElemPart.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +void box_partition( const size_t global_size , + const size_t global_rank , + const size_t global_box[][2] , + size_t box[][2] ) +{ + box[0][0] = global_box[0][0] ; box[0][1] = global_box[0][1] ; + box[1][0] = global_box[1][0] ; box[1][1] = global_box[1][1] ; + box[2][0] = global_box[2][0] ; box[2][1] = global_box[2][1] ; + + size_t ip = 0 ; + size_t np = global_size ; + + while ( 1 < np ) { + + // P = [ ip + j * portion , ip + ( j + 1 ) * portion ) + + size_t jip , jup ; + + { + const size_t part = ( 0 == ( np % 5 ) ) ? 5 : ( + ( 0 == ( np % 3 ) ) ? 3 : 2 ); + + const size_t portion = np / part ; + + if ( 2 < part || global_rank < ip + portion ) { + jip = portion * size_t( double( global_rank - ip ) / double(portion) ); + jup = jip + portion ; + } + else { + jip = portion ; + jup = np ; + } + } + + // Choose axis with largest count: + + const size_t nb[3] = { + box[0][1] - box[0][0] , + box[1][1] - box[1][0] , + box[2][1] - box[2][0] }; + + const int axis = nb[2] > nb[1] ? ( nb[2] > nb[0] ? 2 : 0 ) + : ( nb[1] > nb[0] ? 1 : 0 ); + + box[ axis ][1] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jup) / double(np) )); + box[ axis ][0] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jip) / double(np) )); + + np = jup - jip ; + ip = ip + jip ; + } +} + +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +void BoxElemPart::local( const size_t rank , + size_t uses_elem[][2] , + size_t owns_node[][2] , + size_t uses_node[][2] ) const +{ + if ( BoxElemPart::DecomposeElem == m_decompose ) { + + Kokkos::Example::box_partition( m_global_size , rank , m_global_elem_box , uses_elem ); + + for ( int i = 0 ; i < 3 ; ++i ) { + owns_node[i][0] = uses_elem[i][0] ; + owns_node[i][1] = uses_elem[i][1] + ( m_global_elem_box[i][1] == uses_elem[i][1] ? 1 : 0 ); + } + } + else { + + const size_t global_vert[3][2] = + { { 0 , m_global_elem_box[0][1] + 1 }, + { 0 , m_global_elem_box[1][1] + 1 }, + { 0 , m_global_elem_box[2][1] + 1 } }; + + Kokkos::Example::box_partition( m_global_size , rank , global_vert , owns_node ); + + for ( int i = 0 ; i < 3 ; ++i ) { + uses_elem[i][0] = global_vert[i][0] == owns_node[i][0] ? owns_node[i][0] : owns_node[i][0] - 1 ; + uses_elem[i][1] = global_vert[i][1] == owns_node[i][1] ? owns_node[i][1] - 1 : owns_node[i][1] ; + } + } + + for ( int i = 0 ; i < 3 ; ++i ) { + uses_node[i][0] = uses_elem[i][0] ; + uses_node[i][1] = uses_elem[i][1] + 1 ; + } + + if ( BoxElemPart::ElemQuadratic == m_elem_order ) { + for ( int i = 0 ; i < 3 ; ++i ) { + owns_node[i][0] = 2 * owns_node[i][0] ; + uses_node[i][0] = 2 * uses_node[i][0] ; + owns_node[i][1] = 2 * owns_node[i][1] - 1 ; + uses_node[i][1] = 2 * uses_node[i][1] - 1 ; + } + } +} + +BoxElemPart::BoxElemPart( + const BoxElemPart::ElemOrder elem_order , + const BoxElemPart::Decompose decompose , + const size_t global_size , + const size_t global_rank , + const size_t elem_nx , + const size_t elem_ny , + const size_t elem_nz ) +{ + m_global_size = global_size ; + m_global_rank = global_rank ; + m_decompose = decompose ; + m_elem_order = elem_order ; + + m_global_elem_box[0][0] = 0 ; m_global_elem_box[0][1] = elem_nx ; + m_global_elem_box[1][0] = 0 ; m_global_elem_box[1][1] = elem_ny ; + m_global_elem_box[2][0] = 0 ; m_global_elem_box[2][1] = elem_nz ; + + m_global_node_box[0][0] = 0 ; m_global_node_box[0][1] = 0 ; + m_global_node_box[1][0] = 0 ; m_global_node_box[1][1] = 0 ; + m_global_node_box[2][0] = 0 ; m_global_node_box[2][1] = 0 ; + + m_owns_node_count = 0 ; + m_send_node_count = 0 ; + + m_ok = true ; + + //---------------------------------------- + + if ( ElemLinear == elem_order ) { + m_global_node_box[0][1] = elem_nx + 1 ; + m_global_node_box[1][1] = elem_ny + 1 ; + m_global_node_box[2][1] = elem_nz + 1 ; + } + else if ( ElemQuadratic == elem_order ) { + m_global_node_box[0][1] = 2 * elem_nx + 1 ; + m_global_node_box[1][1] = 2 * elem_ny + 1 ; + m_global_node_box[2][1] = 2 * elem_nz + 1 ; + } + + //---------------------------------------- + + local( m_global_rank , m_uses_elem_box , m_owns_node_box[0] , m_uses_node_box ); + + const size_t global_node_count_ = Kokkos::Example::box_count( m_global_node_box ); + const size_t global_elem_count_ = Kokkos::Example::box_count( m_global_elem_box ); + + //---------------------------------------- + + size_t elem_count = Kokkos::Example::box_count( m_uses_elem_box ); + size_t node_count = Kokkos::Example::box_count( m_owns_node_box[0] ); + + m_owns_node[0][0] = global_rank ; + m_owns_node[0][1] = node_count ; + m_owns_node_count = 1 ; + m_send_node_count = 0 ; + + for ( size_t rr = 1 ; rr < m_global_size && m_ok ; ++rr ) { + + const size_t rank = ( m_global_rank + rr ) % m_global_size ; + + size_t elem_box[3][2] , o_node_box[3][2] , u_node_box[3][2] ; + + // Boxes for process 'rank' + local( rank , elem_box , o_node_box , u_node_box ); + + // Box that this process uses but is owned by process 'rank' + Kokkos::Example::box_intersect( m_owns_node_box[ m_owns_node_count ] , m_uses_node_box , o_node_box ); + + m_owns_node[ m_owns_node_count ][1] = Kokkos::Example::box_count( m_owns_node_box[ m_owns_node_count ] ); + + if ( m_owns_node[ m_owns_node_count ][1] ) { + + if ( ( PROC_NEIGH_MAX - 1 ) <= m_owns_node_count ) { + std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ; + m_ok = false ; + break ; + } + + m_owns_node[ m_owns_node_count ][0] = rank ; + + ++m_owns_node_count ; + } + + // Box that this process owns and is used by process 'rank' + Kokkos::Example::box_intersect( m_send_node_box[ m_send_node_count ] , m_owns_node_box[0] , u_node_box ); + + m_send_node[ m_send_node_count ][1] = Kokkos::Example::box_count( m_send_node_box[ m_send_node_count ] ); + + if ( m_send_node[ m_send_node_count ][1] ) { + + if ( ( PROC_NEIGH_MAX - 1 ) <= m_send_node_count ) { + std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ; + m_ok = false ; + break ; + } + + m_send_node[ m_send_node_count ][0] = rank ; + ++m_send_node_count ; + } + + // Error checking: + + size_t test_box[3][2] ; + + elem_count += Kokkos::Example::box_count( elem_box ); + node_count += Kokkos::Example::box_count( o_node_box ); + + { + Kokkos::Example::box_intersect( test_box , m_owns_node_box[0] , o_node_box ); + + if ( Kokkos::Example::box_count( test_box ) ) { + std::cout << "Box partitioning error" << std::endl ; + std::cout << "owns_node[" << m_global_rank << "]{" + << " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")" + << " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")" + << " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")" + << "} intersects" + << " owns_node[" << rank << "]{" + << " [" << o_node_box[0][0] << "," << o_node_box[0][1] << ")" + << " [" << o_node_box[1][0] << "," << o_node_box[1][1] << ")" + << " [" << o_node_box[2][0] << "," << o_node_box[2][1] << ")" + << "}" << std::endl ; + m_ok = false ; + break ; + } + } + + if ( DecomposeElem == decompose ) { + + Kokkos::Example::box_intersect( test_box , m_uses_elem_box , elem_box ); + + if ( Kokkos::Example::box_count( test_box ) ) { + std::cout << "Box partitioning error" << std::endl ; + std::cout << "ElemBox[" << m_global_rank << "]{" + << " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")" + << " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")" + << " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")" + << "} intersects" + << " ElemBox[" << rank << "]{" + << " [" << elem_box[0][0] << "," << elem_box[0][1] << ")" + << " [" << elem_box[1][0] << "," << elem_box[1][1] << ")" + << " [" << elem_box[2][0] << "," << elem_box[2][1] << ")" + << "}" << std::endl ; + m_ok = false ; + break ; + } + } + } + + // Sentinal values at the end of the owns and send lists: + + m_owns_node[ m_owns_node_count ][0] = ~0u ; + m_owns_node[ m_owns_node_count ][1] = ~0u ; + m_owns_node_box[ m_owns_node_count ][0][0] = 0u ; m_owns_node_box[ m_owns_node_count ][0][0] = ~0u ; + m_owns_node_box[ m_owns_node_count ][1][0] = 0u ; m_owns_node_box[ m_owns_node_count ][1][0] = ~0u ; + m_owns_node_box[ m_owns_node_count ][2][0] = 0u ; m_owns_node_box[ m_owns_node_count ][2][0] = ~0u ; + + m_send_node[ m_send_node_count ][0] = ~0u ; + m_send_node[ m_send_node_count ][1] = ~0u ; + m_send_node_box[ m_send_node_count ][0][0] = 0u ; m_send_node_box[ m_send_node_count ][0][0] = ~0u ; + m_send_node_box[ m_send_node_count ][1][0] = 0u ; m_send_node_box[ m_send_node_count ][1][0] = ~0u ; + m_send_node_box[ m_send_node_count ][2][0] = 0u ; m_send_node_box[ m_send_node_count ][2][0] = ~0u ; + + { + size_t count = 0 ; + for ( size_t i = 0 ; i < m_owns_node_count ; ++i ) { + count += m_owns_node[i][1] ; + } + if ( count != Kokkos::Example::box_count( m_uses_node_box ) ) { + std::cout << "Node uses count = " << Kokkos::Example::box_count( m_uses_node_box ) + << " error count = " << count << std::endl ; + m_ok = false ; + } + } + + if ( global_node_count_ != node_count ) { + std::cout << "Node count = " << global_node_count_ << " overlap error count = " << node_count << std::endl ; + m_ok = false ; + } + + if ( DecomposeElem == decompose && global_elem_count_ != elem_count ) { + std::cout << "Elem count = " << global_elem_count_ << " overlap error count = " << elem_count << std::endl ; + m_ok = false ; + } + + if ( ! m_ok ) { + for ( int i = 0 ; i < 3 ; ++i ) { for ( int j = 0 ; j < 2 ; ++j ) { + m_global_elem_box[i][j] = 0 ; + m_global_node_box[i][j] = 0 ; + m_uses_elem_box[i][j] = 0 ; + m_uses_node_box[i][j] = 0 ; + }} + m_owns_node_count = 0 ; + m_send_node_count = 0 ; + } +} + +void BoxElemPart::print( std::ostream & s ) const +{ + s << "BoxElemPart P[" << m_global_rank << ":" << m_global_size << "]" + << std::endl + << " elem_box {" + << " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")" + << " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")" + << " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")" + << " } / {" + << " [" << m_global_elem_box[0][0] << "," << m_global_elem_box[0][1] << ")" + << " [" << m_global_elem_box[1][0] << "," << m_global_elem_box[1][1] << ")" + << " [" << m_global_elem_box[2][0] << "," << m_global_elem_box[2][1] << ")" + << " }" + << std::endl + << " node_box {" + << " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")" + << " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")" + << " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")" + << " } / {" + << " [" << m_uses_node_box[0][0] << "," << m_uses_node_box[0][1] << ")" + << " [" << m_uses_node_box[1][0] << "," << m_uses_node_box[1][1] << ")" + << " [" << m_uses_node_box[2][0] << "," << m_uses_node_box[2][1] << ")" + << " } / {" + << " [" << m_global_node_box[0][0] << "," << m_global_node_box[0][1] << ")" + << " [" << m_global_node_box[1][0] << "," << m_global_node_box[1][1] << ")" + << " [" << m_global_node_box[2][0] << "," << m_global_node_box[2][1] << ")" + << " }" + << std::endl ; + + for ( size_t i = 1 ; i < m_owns_node_count ; ++i ) { + s << " P[" << m_owns_node[i][0] << "]" + << " recv node_box {" + << " [" << m_owns_node_box[i][0][0] << "," << m_owns_node_box[i][0][1] << ")" + << " [" << m_owns_node_box[i][1][0] << "," << m_owns_node_box[i][1][1] << ")" + << " [" << m_owns_node_box[i][2][0] << "," << m_owns_node_box[i][2][1] << ")" + << " }" + << std::endl ; + } + + for ( size_t i = 0 ; i < m_send_node_count ; ++i ) { + s << " P[" << m_send_node[i][0] << "]" + << " send node_box {" + << " [" << m_send_node_box[i][0][0] << "," << m_send_node_box[i][0][1] << ")" + << " [" << m_send_node_box[i][1][0] << "," << m_send_node_box[i][1][1] << ")" + << " [" << m_send_node_box[i][2][0] << "," << m_send_node_box[i][2][1] << ")" + << " }" + << std::endl ; + } +} + +} /* namespace Example */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- + + diff --git a/lib/kokkos/example/fixture/BoxElemPart.hpp b/lib/kokkos/example/fixture/BoxElemPart.hpp new file mode 100644 index 0000000000000000000000000000000000000000..98f44e7d9da92a9b3c04a2df936cc850d9b1d632 --- /dev/null +++ b/lib/kokkos/example/fixture/BoxElemPart.hpp @@ -0,0 +1,320 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BOXELEMPART_HPP +#define KOKKOS_BOXELEMPART_HPP + +#include <utility> +#include <ostream> +#include <Kokkos_Macros.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +KOKKOS_INLINE_FUNCTION +void box_intersect( size_t box[][2] , + const size_t boxA[][2] , + const size_t boxB[][2] ) +{ + for ( int i = 0 ; i < 3 ; ++i ) { + box[i][0] = boxA[i][0] > boxB[i][0] ? boxA[i][0] : boxB[i][0] ; + box[i][1] = boxA[i][1] < boxB[i][1] ? boxA[i][1] : boxB[i][1] ; + if ( box[i][0] > box[i][1] ) box[i][1] = box[i][0] ; + } +} + +KOKKOS_INLINE_FUNCTION +size_t box_count( const size_t box[][2] ) +{ + return size_t( box[0][1] - box[0][0] ) * + size_t( box[1][1] - box[1][0] ) * + size_t( box[2][1] - box[2][0] ); +} + +KOKKOS_INLINE_FUNCTION +void box_ghost_layer( const size_t global_box[][2] , + const size_t local_box[][2] , + const size_t ghost_layer , + size_t ghost_box[][2] ) +{ + for ( int i = 0 ; i < 3 ; ++i ) { + ghost_box[i][0] = global_box[i][0] + ghost_layer > local_box[i][0] ? global_box[i][0] : local_box[i][0] - ghost_layer ; + ghost_box[i][1] = global_box[i][1] < local_box[i][1] + ghost_layer ? global_box[i][1] : local_box[i][1] + ghost_layer ; + } +} + +void box_partition( const size_t global_size , + const size_t global_rank , + const size_t global_box[][2] , + size_t box[][2] ); + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Example { + +/** \brief Partition a box of hexahedral elements among subdomains. + * + * Nodes are ordered locally as follows: + * { owned_by[ this_process ] , + * owned_by[ neighbor_process[0] ] , + * owned_by[ neighbor_process[1] ] , + * owned_by[ neighbor_process[2] ] , + * ... }; + */ +class BoxElemPart { +public: + + enum Decompose { DecomposeNode , DecomposeElem }; + enum ElemOrder { ElemLinear , ElemQuadratic }; + + bool ok() const { return m_ok ; } + + BoxElemPart( const ElemOrder elem_order , + const Decompose decompose , + const size_t global_size , + const size_t global_rank , + const size_t elem_nx , + const size_t elem_ny , + const size_t elem_nz ); + + KOKKOS_INLINE_FUNCTION + size_t global_elem_count() const + { return Kokkos::Example::box_count( m_global_elem_box ); } + + KOKKOS_INLINE_FUNCTION + size_t global_node_count() const + { return Kokkos::Example::box_count( m_global_node_box ); } + + KOKKOS_INLINE_FUNCTION + size_t uses_elem_count() const + { return Kokkos::Example::box_count( m_uses_elem_box ); } + + KOKKOS_INLINE_FUNCTION + size_t owns_node_count() const + { return Kokkos::Example::box_count( m_owns_node_box[0] ); } + + KOKKOS_INLINE_FUNCTION + size_t uses_node_count() const + { return Kokkos::Example::box_count( m_uses_node_box ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + size_t uses_elem_offset( const size_t ix , + const size_t iy , + const size_t iz ) const + { + return size_t( ix - m_uses_elem_box[0][0] ) + size_t( m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ) * ( + size_t( iy - m_uses_elem_box[1][0] ) + size_t( m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ) * ( + size_t( iz - m_uses_elem_box[2][0] ) ) ); + } + + KOKKOS_INLINE_FUNCTION + void uses_elem_coord( size_t lid , size_t c[] ) const + { + const size_t nx = m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ; + const size_t ny = m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ; + + c[0] = m_uses_elem_box[0][0] + lid % nx ; lid /= nx ; + c[1] = m_uses_elem_box[1][0] + lid % ny ; lid /= ny ; + c[2] = m_uses_elem_box[2][0] + lid ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + size_t global_coord_max( size_t axis ) const + { return m_global_node_box[axis][1] - 1 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void local_node_coord( size_t lid , size_t coord[] ) const + { + // Local id within an 'owns' block (has sentinal) + size_t j = 0 ; + while ( m_owns_node[j][1] <= lid ) { lid -= m_owns_node[j][1] ; ++j ; } + + // Map to global coordinates: + const size_t nx = m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ; + const size_t ny = m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ; + + coord[0] = m_owns_node_box[j][0][0] + lid % nx ; lid /= nx ; + coord[1] = m_owns_node_box[j][1][0] + lid % ny ; lid /= ny ; + coord[2] = m_owns_node_box[j][2][0] + lid ; + } + + KOKKOS_INLINE_FUNCTION + size_t local_node_id( const size_t c[] ) const + { + // Find which 'owns' block and accumulate the offset of this block: + size_t lid = 0 ; + size_t j = 0 ; + while ( ! ( m_owns_node_box[j][0][0] <= c[0] && c[0] < m_owns_node_box[j][0][1] && + m_owns_node_box[j][1][0] <= c[1] && c[1] < m_owns_node_box[j][1][1] && + m_owns_node_box[j][2][0] <= c[2] && c[2] < m_owns_node_box[j][2][1] ) ) { + + lid += m_owns_node[j][1] ; + ++j ; + } + + // Map offset to the block plus offset within the block: + return lid + + size_t( c[0] - m_owns_node_box[j][0][0] ) + size_t( m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ) * ( + size_t( c[1] - m_owns_node_box[j][1][0] ) + size_t( m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ) * ( + size_t( c[2] - m_owns_node_box[j][2][0] ) ) ); + } + + KOKKOS_INLINE_FUNCTION + size_t global_node_id( const size_t c[] ) const + { + return size_t( c[0] - m_global_node_box[0][0] ) + size_t( m_global_node_box[0][1] - m_global_node_box[0][0] ) * ( + size_t( c[1] - m_global_node_box[1][0] ) + size_t( m_global_node_box[1][1] - m_global_node_box[1][0] ) * ( + size_t( c[2] - m_global_node_box[2][0] ) ) ); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + size_t recv_node_msg_count() const { return m_owns_node_count - 1 ; } + + KOKKOS_INLINE_FUNCTION + size_t recv_node_rank( size_t msg ) const { return m_owns_node[msg+1][0] ; } + + KOKKOS_INLINE_FUNCTION + size_t recv_node_count( size_t msg ) const { return m_owns_node[msg+1][1] ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + size_t send_node_msg_count() const { return m_send_node_count ; } + + KOKKOS_INLINE_FUNCTION + size_t send_node_rank( size_t msg ) const { return m_send_node[msg][0] ; } + + KOKKOS_INLINE_FUNCTION + size_t send_node_count( size_t msg ) const { return m_send_node[msg][1] ; } + + KOKKOS_INLINE_FUNCTION + size_t send_node_id_count() const + { + size_t count = 0 ; + for ( size_t i = 0 ; i < m_send_node_count ; ++i ) { + count += m_send_node[i][1] ; + } + return count ; + } + + KOKKOS_INLINE_FUNCTION + size_t send_node_id( size_t item ) const + { + // Find which send list this send item is in: + size_t j = 0 ; + while ( m_send_node[j][1] <= item ) { item -= m_send_node[j][1] ; ++j ; } + + // Map to global coordinate: + const size_t nx = m_send_node_box[j][0][1] - m_send_node_box[j][0][0] ; + const size_t ny = m_send_node_box[j][1][1] - m_send_node_box[j][1][0] ; + + size_t c[3] ; + + c[0] = m_send_node_box[j][0][0] + item % nx ; item /= nx ; + c[1] = m_send_node_box[j][1][0] + item % ny ; item /= ny ; + c[2] = m_send_node_box[j][2][0] + item ; + + // Map to local id: + return size_t( c[0] - m_owns_node_box[0][0][0] ) + size_t( m_owns_node_box[0][0][1] - m_owns_node_box[0][0][0] ) * ( + size_t( c[1] - m_owns_node_box[0][1][0] ) + size_t( m_owns_node_box[0][1][1] - m_owns_node_box[0][1][0] ) * ( + size_t( c[2] - m_owns_node_box[0][2][0] ) ) ); + } + + //---------------------------------------- + + void print( std::ostream & s ) const ; + +private: + + // Maximum number of processes in a neighborhood, including this process + enum { PROC_NEIGH_MAX = 64 }; + + void local( const size_t rank , + size_t uses_elem[][2] , + size_t owns_node[][2] , + size_t uses_node[][2] ) const ; + + size_t m_global_size ; + size_t m_global_rank ; + + Decompose m_decompose ; + ElemOrder m_elem_order ; + + size_t m_global_elem_box[3][2] ; + size_t m_global_node_box[3][2] ; + size_t m_uses_elem_box[3][2] ; + size_t m_uses_node_box[3][2] ; + + // [ processor rank , count ] + size_t m_owns_node_box[ PROC_NEIGH_MAX ][3][2] ; + size_t m_owns_node[ PROC_NEIGH_MAX ][2] ; + size_t m_owns_node_count ; + + size_t m_send_node_box[ PROC_NEIGH_MAX ][3][2] ; + size_t m_send_node[ PROC_NEIGH_MAX ][2] ; + size_t m_send_node_count ; + + bool m_ok ; +}; + +} // namespace Example +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_BOXELEMPART_HPP */ + diff --git a/lib/kokkos/example/fixture/CMakeLists.txt b/lib/kokkos/example/fixture/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..298c54c5bb3e00bf5ecaf5ad18e53de2ba405272 --- /dev/null +++ b/lib/kokkos/example/fixture/CMakeLists.txt @@ -0,0 +1,13 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) + +SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp ) + +# Automatically picks up 'kokkosexample_fixture' +TRIBITS_ADD_EXECUTABLE_AND_TEST( + TestFixture + SOURCES ${SOURCES_TEST} + ) + diff --git a/lib/kokkos/example/fixture/HexElement.hpp b/lib/kokkos/example/fixture/HexElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cb39358f9d23a6e01a45f9ca6f277613bb321301 --- /dev/null +++ b/lib/kokkos/example/fixture/HexElement.hpp @@ -0,0 +1,270 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HEXELEMENT_HPP +#define KOKKOS_HEXELEMENT_HPP + +namespace Kokkos { +namespace Example { + +template< unsigned NodeCount > +class HexElement_TensorData ; + +template< unsigned NodeCount , class Device > +class HexElement_TensorEval ; + +//---------------------------------------------------------------------------- +/** \brief Evaluate Hex element on interval [-1,1]^3 */ +template<> +class HexElement_TensorData< 8 > { +public: + + static const unsigned element_node_count = 8 ; + static const unsigned spatial_dimension = 3 ; + static const unsigned integration_count_1d = 2 ; + static const unsigned function_count_1d = 2 ; + + float values_1d [ function_count_1d ][ integration_count_1d ]; + float derivs_1d [ function_count_1d ][ integration_count_1d ]; + float weights_1d[ integration_count_1d ]; + + unsigned char eval_map[ element_node_count ][4] ; + + static float eval_value_1d( const unsigned jf , const float x ) + { + return 0 == jf ? 0.5 * ( 1.0 - x ) : ( + 1 == jf ? 0.5 * ( 1.0 + x ) : 0 ); + } + + static float eval_deriv_1d( const unsigned jf , const float ) + { + return 0 == jf ? -0.5 : ( + 1 == jf ? 0.5 : 0 ); + } + + HexElement_TensorData() + { + const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] = + { { 0 , 0 , 0 }, + { 1 , 0 , 0 }, + { 1 , 1 , 0 }, + { 0 , 1 , 0 }, + { 0 , 0 , 1 }, + { 1 , 0 , 1 }, + { 1 , 1 , 1 }, + { 0 , 1 , 1 } }; + + weights_1d[0] = 1 ; + weights_1d[1] = 1 ; + + const float points_1d[ integration_count_1d ] = + { -0.577350269 , 0.577350269 }; + + for ( unsigned i = 0 ; i < element_node_count ; ++i ) { + eval_map[i][0] = tmp_map[i][0]; + eval_map[i][1] = tmp_map[i][1]; + eval_map[i][2] = tmp_map[i][2]; + } + + for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) { + for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) { + values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] ); + derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] ); + }} + } +}; + +//---------------------------------------------------------------------------- + +template<> +class HexElement_TensorData< 27 > { +public: + + static const unsigned element_node_count = 27 ; + static const unsigned spatial_dimension = 3 ; + static const unsigned integration_count_1d = 3 ; + static const unsigned function_count_1d = 3 ; + + float values_1d [ function_count_1d ][ integration_count_1d ]; + float derivs_1d [ function_count_1d ][ integration_count_1d ]; + float weights_1d[ integration_count_1d ]; + + unsigned char eval_map[ element_node_count ][4] ; + + // sizeof(EvaluateElementHex) = 111 bytes = + // sizeof(float) * 9 + + // sizeof(float) * 9 + + // sizeof(float) * 3 + + // sizeof(char) * 27 + + static float eval_value_1d( const unsigned jf , const float p ) + { + return 0 == jf ? 0.5 * p * ( p - 1 ) : ( + 1 == jf ? 1.0 - p * p : ( + 2 == jf ? 0.5 * p * ( p + 1 ) : 0 )); + } + + static float eval_deriv_1d( const unsigned jf , const float p ) + { + return 0 == jf ? p - 0.5 : ( + 1 == jf ? -2.0 * p : ( + 2 == jf ? p + 0.5 : 0 )); + } + + HexElement_TensorData() + { + const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] = + { { 0 , 0 , 0 }, + { 2 , 0 , 0 }, + { 2 , 2 , 0 }, + { 0 , 2 , 0 }, + { 0 , 0 , 2 }, + { 2 , 0 , 2 }, + { 2 , 2 , 2 }, + { 0 , 2 , 2 }, + { 1 , 0 , 0 }, + { 2 , 1 , 0 }, + { 1 , 2 , 0 }, + { 0 , 1 , 0 }, + { 0 , 0 , 1 }, + { 2 , 0 , 1 }, + { 2 , 2 , 1 }, + { 0 , 2 , 1 }, + { 1 , 0 , 2 }, + { 2 , 1 , 2 }, + { 1 , 2 , 2 }, + { 0 , 1 , 2 }, + { 1 , 1 , 1 }, + { 1 , 1 , 0 }, + { 1 , 1 , 2 }, + { 0 , 1 , 1 }, + { 2 , 1 , 1 }, + { 1 , 0 , 1 }, + { 1 , 2 , 1 } }; + + // Interval [-1,1] + + weights_1d[0] = 0.555555556 ; + weights_1d[1] = 0.888888889 ; + weights_1d[2] = 0.555555556 ; + + const float points_1d[3] = { -0.774596669 , + 0.000000000 , + 0.774596669 }; + + for ( unsigned i = 0 ; i < element_node_count ; ++i ) { + eval_map[i][0] = tmp_map[i][0]; + eval_map[i][1] = tmp_map[i][1]; + eval_map[i][2] = tmp_map[i][2]; + } + + for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) { + for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) { + values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] ); + derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] ); + }} + } +}; + +//---------------------------------------------------------------------------- + +template< unsigned NodeCount > +class HexElement_Data { +public: + static const unsigned spatial_dimension = 3 ; + static const unsigned element_node_count = NodeCount ; + static const unsigned integration_count = NodeCount ; + static const unsigned function_count = NodeCount ; + + float weights[ integration_count ] ; + float values[ integration_count ][ function_count ]; + float gradients[ integration_count ][ spatial_dimension ][ function_count ]; + + HexElement_Data() + { + HexElement_TensorData< NodeCount > tensor_data ; + + for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) { + + const unsigned ipx = tensor_data.eval_map[ip][0] ; + const unsigned ipy = tensor_data.eval_map[ip][1] ; + const unsigned ipz = tensor_data.eval_map[ip][2] ; + + weights[ip] = tensor_data.weights_1d[ ipx ] * + tensor_data.weights_1d[ ipy ] * + tensor_data.weights_1d[ ipz ] ; + + for ( unsigned jf = 0 ; jf < function_count ; ++jf ) { + + const unsigned jfx = tensor_data.eval_map[jf][0] ; + const unsigned jfy = tensor_data.eval_map[jf][1] ; + const unsigned jfz = tensor_data.eval_map[jf][2] ; + + values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.derivs_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.derivs_1d[ ipz ][ jfz ] ; + } + } + } +}; + +//---------------------------------------------------------------------------- + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_HEXELEMENT_HPP */ + + diff --git a/lib/kokkos/example/fixture/Main.cpp b/lib/kokkos/example/fixture/Main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d80828ca521c40cebba1d951e46a04ef067c2745 --- /dev/null +++ b/lib/kokkos/example/fixture/Main.cpp @@ -0,0 +1,304 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include <utility> +#include <iostream> + +#include <Kokkos_Core.hpp> + +#include <BoxElemPart.hpp> + +namespace Kokkos { +namespace Example { +template< class > void test_fixture(); +} +} + +int test_box( const size_t global_size + , const size_t global_box[][2] + , const bool print_verbose ) +{ + size_t global_count = 0 ; + size_t global_max = 0 ; + size_t global_min = Kokkos::Example::box_count( global_box ); + size_t global_box_max[3][2] = { { 0 , 0 } , { 0 , 0 } , { 0 , 0 } }; + size_t global_box_min[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } }; + size_t intersect_error = 0 ; + size_t neighbor_max = 0 ; + + for ( size_t global_rank = 0 ; global_rank < global_size ; ++global_rank ) { + size_t box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } }; + size_t ghost_box[3][2] ; + size_t neighbor_count = 0 ; + + Kokkos::Example::box_partition( global_size , global_rank , global_box , box ); + + Kokkos::Example::box_ghost_layer( global_box , box , 1 , ghost_box ); + + { + const size_t n = Kokkos::Example::box_count( box ); + + for ( int i = 0 ; i < 3 ; ++i ) { + if ( ( box[i][1] - box[i][0] ) < ( global_box_min[i][1] - global_box_min[i][0] ) ) { + global_box_min[i][0] = box[i][0] ; + global_box_min[i][1] = box[i][1] ; + } + if ( ( box[i][1] - box[i][0] ) > ( global_box_max[i][1] - global_box_max[i][0] ) ) { + global_box_max[i][0] = box[i][0] ; + global_box_max[i][1] = box[i][1] ; + } + } + + global_max = std::max( global_max , n ); + global_min = std::min( global_min , n ); + global_count += n ; + } + + for ( size_t other_rank = 0 ; other_rank < global_size ; ++other_rank ) { + + if ( other_rank == global_rank ) continue ; + + size_t other_box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } }; + size_t intersect_box[3][2] ; + + Kokkos::Example::box_partition( global_size , other_rank , global_box , other_box ); + + Kokkos::Example::box_intersect( intersect_box , box , other_box ); + + const size_t n = Kokkos::Example::box_count( intersect_box ); + + intersect_error += n ; + + Kokkos::Example::box_intersect( intersect_box , ghost_box , other_box ); + + neighbor_count += Kokkos::Example::box_count( intersect_box ) ? 1 : 0 ; + + if ( n ) { + std::cout << "box partition intersection error" << std::endl ; + std::cout << "box = {" + << " [ " << box[0][0] << " , " << box[0][1] << " )" + << " [ " << box[1][0] << " , " << box[1][1] << " )" + << " [ " << box[2][0] << " , " << box[2][1] << " )" + << " }" << std::endl ; + std::cout << "other_box = {" + << " [ " << other_box[0][0] << " , " << other_box[0][1] << " )" + << " [ " << other_box[1][0] << " , " << other_box[1][1] << " )" + << " [ " << other_box[2][0] << " , " << other_box[2][1] << " )" + << " }" << std::endl ; + return 0 ; + } + } + + neighbor_max = std::max( neighbor_max , neighbor_count ); + } + + if ( print_verbose ) { + + std::cout << "global_part = " << global_size << std::endl ; + std::cout << "global_box = { " + << " [ " << global_box[0][0] << " .. " << global_box[0][1] << " ) X" + << " [ " << global_box[1][0] << " .. " << global_box[1][1] << " ) X" + << " [ " << global_box[2][0] << " .. " << global_box[2][1] << " )" + << " }" << std::endl ; + std::cout << "count( global_box ) = " << Kokkos::Example::box_count( global_box ) << std::endl ; + std::cout << "sum partition( global_box ) = " << global_count << std::endl ; + std::cout << "avg partition( global_box ) = " << size_t( double(global_count) / double(global_size)) << std::endl ; + std::cout << "min partition( global_box ) = " << global_min << std::endl ; + std::cout << "min part X ( global_box ) = [ " << global_box_min[0][0] << " .. " << global_box_min[0][1] << " )" << std::endl ; + std::cout << "min part Y ( global_box ) = [ " << global_box_min[1][0] << " .. " << global_box_min[1][1] << " )" << std::endl ; + std::cout << "min part Z ( global_box ) = [ " << global_box_min[2][0] << " .. " << global_box_min[2][1] << " )" << std::endl ; + std::cout << "max partition( global_box ) = " << global_max << std::endl ; + std::cout << "max part X ( global_box ) = [ " << global_box_max[0][0] << " .. " << global_box_max[0][1] << " )" << std::endl ; + std::cout << "max part Y ( global_box ) = [ " << global_box_max[1][0] << " .. " << global_box_max[1][1] << " )" << std::endl ; + std::cout << "max part Z ( global_box ) = [ " << global_box_max[2][0] << " .. " << global_box_max[2][1] << " )" << std::endl ; + std::cout << "sum intersect( global_box ) = " << intersect_error << std::endl ; + std::cout << "max neighbor = " << neighbor_max << std::endl ; + } + + return neighbor_max ; +} + +void test_elem() +{ + const Kokkos::Example::BoxElemPart::Decompose + decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ; + const size_t global_size = 256 ; + const size_t global_nx = 100 ; + const size_t global_ny = 120 ; + const size_t global_nz = 140 ; + + double node_count_avg = 0 ; + size_t node_count_max = 0 ; + size_t node_count_min = ( global_nx + 1 ) * ( global_ny + 1 ) * ( global_nz + 1 ); + double elem_count_avg = 0 ; + size_t elem_count_max = 0 ; + size_t elem_count_min = global_nx * global_ny * global_nz ; + double recv_count_avg = 0 ; + size_t recv_count_max = 0 ; + size_t recv_count_min = global_size ; + double send_count_avg = 0 ; + size_t send_count_max = 0 ; + size_t send_count_min = global_size ; + + for ( size_t r = 0 ; r < global_size ; ++r ) { + const Kokkos::Example::BoxElemPart + fixture( Kokkos::Example::BoxElemPart::ElemLinear , + decompose , global_size , r , global_nx , global_ny , global_nz ); + + // Print a sample: + + // if ( r == global_size * 2 / 3 ) fixture.print( std::cout ); + + // Verify recv/send alignment: + + { + size_t recv_lid = fixture.owns_node_count(); + + for ( size_t i = 0 ; i < fixture.recv_node_msg_count() ; ++i ) { + const size_t recv_rank = fixture.recv_node_rank( i ); + const size_t recv_count = fixture.recv_node_count( i ); + + const Kokkos::Example::BoxElemPart other_fixture( + Kokkos::Example::BoxElemPart::ElemLinear , + decompose , global_size , recv_rank , global_nx , global_ny , global_nz ); + + size_t send_item = 0 ; + + size_t j = 0 ; + while ( j < other_fixture.send_node_msg_count() && other_fixture.send_node_rank(j) != r ) { + send_item += other_fixture.send_node_count( j ); + ++j ; + } + + if ( recv_count != other_fixture.send_node_count(j) ) { + std::cout << "Error P[" << r << "].recv(" << recv_count << ") != " + << "P[" << recv_rank << "].send(" << other_fixture.send_node_count(j) << ")" + << std::endl ; + } + else { + + for ( size_t k = 0 ; k < recv_count ; ++k , ++send_item , ++recv_lid ) { + + const size_t send_lid = other_fixture.send_node_id( send_item ); + + size_t recv_coord[3] , send_coord[3] ; + + fixture.local_node_coord( recv_lid , recv_coord ); + + other_fixture.local_node_coord( send_lid , send_coord ); + + if ( recv_coord[0] != send_coord[0] || + recv_coord[1] != send_coord[1] || + recv_coord[2] != send_coord[2] ) { + std::cout << "Error P[" << r << "].recv[" << recv_lid << "]{ " + << recv_coord[0] << " , " + << recv_coord[1] << " , " + << recv_coord[2] << " } != " + << "P[" << recv_rank << "].send[" << send_lid << "]{ " + << send_coord[0] << " , " + << send_coord[1] << " , " + << send_coord[2] << " }" + << std::endl ; + } + } + } + } + } + + node_count_avg += fixture.owns_node_count(); + elem_count_avg += fixture.uses_elem_count(); + recv_count_avg += fixture.recv_node_msg_count(); + send_count_avg += fixture.send_node_msg_count(); + + elem_count_min = std::min( (size_t) fixture.uses_elem_count() , elem_count_min ); + elem_count_max = std::max( (size_t) fixture.uses_elem_count() , elem_count_max ); + node_count_min = std::min( (size_t) fixture.owns_node_count() , node_count_min ); + node_count_max = std::max( (size_t) fixture.owns_node_count() , node_count_max ); + + recv_count_max = std::max( (size_t) fixture.recv_node_msg_count() , recv_count_max ); + recv_count_min = std::min( (size_t) fixture.recv_node_msg_count() , recv_count_min ); + send_count_max = std::max( (size_t) fixture.send_node_msg_count() , send_count_max ); + send_count_min = std::min( (size_t) fixture.send_node_msg_count() , send_count_min ); + } + + node_count_avg /= double(global_size); + elem_count_avg /= double(global_size); + recv_count_avg /= double(global_size); + send_count_avg /= double(global_size); + + std::cout << "Elem min(" << elem_count_min << ") avg(" << elem_count_avg << ") max(" << elem_count_max << ") " << std::endl + << "Node min(" << node_count_min << ") avg(" << node_count_avg << ") max(" << node_count_max << ") " << std::endl + << "Recv min(" << recv_count_min << ") avg(" << recv_count_avg << ") max(" << recv_count_max << ") " << std::endl + << "Send min(" << send_count_min << ") avg(" << send_count_avg << ") max(" << send_count_max << ") " << std::endl + ; +} + +int main() +{ + for ( int i = 1 ; i <= 32 ; ++i ) { + const size_t global_size = 16 * i ; + const size_t global_box[3][2] = { { 0 , 65 } , { 0 , 65 } , { 0 , 65 } }; + if ( 30 < test_box( global_size , global_box , false ) ) { + test_box( global_size , global_box , true ); + } + } + +// test_elem(); + + { + std::cout << "test_fixture< Host >" << std::endl ; + Kokkos::HostSpace::execution_space::initialize( 1 ); + Kokkos::Example::test_fixture< Kokkos::HostSpace::execution_space >(); + Kokkos::HostSpace::execution_space::finalize(); + } + +#if defined( KOKKOS_HAVE_CUDA ) + { + std::cout << "test_fixture< Cuda >" << std::endl ; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + Kokkos::Example::test_fixture< Kokkos::Cuda >(); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +#endif +} + diff --git a/lib/kokkos/example/fixture/Makefile b/lib/kokkos/example/fixture/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..990f4f18e7d420f2cb7c991ba2d9732f50ef1c56 --- /dev/null +++ b/lib/kokkos/example/fixture/Makefile @@ -0,0 +1,48 @@ +KOKKOS_PATH = ../.. + +vpath %.cpp ${KOKKOS_PATH}/example/fixture + +EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ) + +default: build_all + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += \ + -I${KOKKOS_PATH}/example/common \ + -I${KOKKOS_PATH}/example/fixture + +EXE_EXAMPLE_FIXTURE = KokkosExample_Fixture +OBJ_EXAMPLE_FIXTURE = Main.o TestFixture.o BoxElemPart.o + +TARGETS = $(EXE_EXAMPLE_FIXTURE) + +#TEST_TARGETS = + +$(EXE_EXAMPLE_FIXTURE) : $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FIXTURE) + +build_all : $(TARGETS) + +test : build_all + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/fixture/TestFixture.cpp b/lib/kokkos/example/fixture/TestFixture.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9cf2f07322c9f8df004ac1ae6f13eb8ad066181f --- /dev/null +++ b/lib/kokkos/example/fixture/TestFixture.cpp @@ -0,0 +1,58 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <TestFixture.hpp> + +namespace Kokkos { +namespace Example { + +template void test_fixture< Kokkos::HostSpace::execution_space >(); + +#if defined( KOKKOS_HAVE_CUDA ) +template void test_fixture<Kokkos::Cuda>(); +#endif + +} /* namespace Example */ +} /* namespace Kokkos */ + diff --git a/lib/kokkos/example/fixture/TestFixture.hpp b/lib/kokkos/example/fixture/TestFixture.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dbf5ca56f512a0a3757b1084e294c016fb038154 --- /dev/null +++ b/lib/kokkos/example/fixture/TestFixture.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP +#define KOKKOS_EXAMPLE_TESTFIXTURE_HPP + +#include <utility> +#include <iostream> + +#include <Kokkos_Core.hpp> + +#include <BoxElemPart.hpp> +#include <BoxElemFixture.hpp> + +namespace Kokkos { +namespace Example { + +template< class Device > +struct FixtureVerifyElemNodeCoord +{ + typedef Device execution_space ; + + typedef struct { size_t success , error ; } value_type ; + + typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ; + + FixtureType m_fixture ; + + KOKKOS_INLINE_FUNCTION + void init( value_type & update ) const { update.success = update.error = 0 ; } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & update , + volatile const value_type & input ) const + { + update.success += input.success ; + update.error += input.error ; + } + + + KOKKOS_INLINE_FUNCTION + void operator()( size_t ielem , value_type & update ) const + { + unsigned node_coord[ FixtureType::ElemNode ][3] ; + + for ( unsigned i = 0 ; i < FixtureType::ElemNode ; ++i ) { + const unsigned node_id = m_fixture.elem_node(ielem,i); + node_coord[i][0] = m_fixture.node_grid(node_id,0); + node_coord[i][1] = m_fixture.node_grid(node_id,1); + node_coord[i][2] = m_fixture.node_grid(node_id,2); + } + + int error = 0 ; + for ( unsigned i = 1 ; i < FixtureType::ElemNode ; ++i ) { + if ( node_coord[0][0] + m_fixture.elem_node_local(i,0) != node_coord[i][0] || + node_coord[0][1] + m_fixture.elem_node_local(i,1) != node_coord[i][1] || + node_coord[0][2] + m_fixture.elem_node_local(i,2) != node_coord[i][2] ) { + error = 1 ; + } + } + + if ( error ) { + ++update.error ; + } + else { + ++update.success ; + } + } + + FixtureVerifyElemNodeCoord( const FixtureType & f ) : m_fixture(f) {} +}; + + +template< class Device > +void test_fixture() +{ + typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ; + + const Kokkos::Example::BoxElemPart::Decompose + decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ; + + const unsigned global_size = 256 ; + const unsigned global_nx = 400 ; + const unsigned global_ny = 400 ; + const unsigned global_nz = 400 ; + + for ( unsigned my_rank = 0 ; my_rank < global_size ; ++my_rank ) { + + const FixtureType fixture( decompose , global_size , my_rank , global_nx , global_ny , global_nz ); + + // Verify grid coordinates of element's nodes + + typename FixtureVerifyElemNodeCoord<Device>::value_type result = { 0 , 0 }; + + Kokkos::parallel_reduce( fixture.elem_node().dimension_0() , FixtureVerifyElemNodeCoord<Device>( fixture ) , result ); + + if ( result.error ) { + std::cout << "P[" << my_rank << ":" << global_size + << "] Fixture elem_node_coord" + << " success(" << result.success << ")" + << " error(" << result.error << ")" + << std::endl ; + } + + // Check send/recv alignment + + + } +} + + +} /* namespace Example */ +} /* namespace Kokkos */ + +#endif /* #ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP */ + diff --git a/lib/kokkos/example/global_2_local_ids/CMakeLists.txt b/lib/kokkos/example/global_2_local_ids/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f32fe580246233f0a5358b5d505abfdeebd0d14 --- /dev/null +++ b/lib/kokkos/example/global_2_local_ids/CMakeLists.txt @@ -0,0 +1,17 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") + +SET(SOURCES + G2L_Main.cpp + ) + +TRIBITS_ADD_EXECUTABLE( + global_2_local_ids + SOURCES ${SOURCES} + COMM serial mpi + ) + + diff --git a/lib/kokkos/example/global_2_local_ids/G2L.hpp b/lib/kokkos/example/global_2_local_ids/G2L.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9023ae04267835ec38f0fd20b9dcd3caf798ad04 --- /dev/null +++ b/lib/kokkos/example/global_2_local_ids/G2L.hpp @@ -0,0 +1,266 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP +#define KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP + +#include <Kokkos_Core.hpp> + +#include <Kokkos_UnorderedMap.hpp> + +#include <vector> +#include <algorithm> +#include <iomanip> + +#include <impl/Kokkos_Timer.hpp> + +// This test will simulate global ids + +namespace G2L { + +static const unsigned begin_id_size = 256u; +static const unsigned end_id_size = 1u << 25; +static const unsigned id_step = 2u; + +//use to help generate global ids +union helper +{ + uint32_t word; + uint8_t byte[4]; +}; + + +//generate a unique global id from the local id +template <typename Device> +struct generate_ids +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<uint32_t*,execution_space> local_id_view; + + local_id_view local_2_global; + + generate_ids( local_id_view & ids) + : local_2_global(ids) + { + Kokkos::parallel_for(local_2_global.size(), *this); + } + + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + + helper x = {static_cast<uint32_t>(i)}; + + // shuffle the bytes of i to create a unique, semi-random global_id + x.word = ~x.word; + + uint8_t tmp = x.byte[3]; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; + + tmp = x.byte[2]; + x.byte[2] = x.byte[0]; + x.byte[0] = tmp; + + local_2_global[i] = x.word; + } + +}; + +// fill a map of global_id -> local_id +template <typename Device> +struct fill_map +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view; + typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + fill_map( global_id_view gIds, local_id_view lIds) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_for(local_2_global.size(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + global_2_local.insert( local_2_global[i], i); + } + +}; + +// check that the global id is found and that it maps to the local id +template <typename Device> +struct find_test +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view; + typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + typedef size_t value_type; + + find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_reduce(local_2_global.size(), *this, num_errors); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type & v) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type & dst, volatile value_type const & src) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i, value_type & num_errors) const + { + uint32_t index = global_2_local.find( local_2_global[i] ); + + if ( !global_2_local.valid_at(index) + || global_2_local.key_at(index) != local_2_global[i] + || global_2_local.value_at(index) != i) + ++num_errors; + } + +}; + +// run test +template <typename Device> +size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations) +{ + + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + + typedef Kokkos::View<uint32_t*,execution_space> local_id_view; + typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view; + + double elasped_time = 0; + Kokkos::Timer timer; + + local_id_view local_2_global("local_ids", num_ids); + global_id_view global_2_local(capacity); + + int shiftw = 15; + + //create + elasped_time = timer.seconds(); + std::cout << std::setw(shiftw) << "allocate: " << elasped_time << std::endl; + timer.reset(); + + // generate unique ids + { + generate_ids<Device> gen(local_2_global); + } + + // generate + elasped_time = timer.seconds(); + std::cout << std::setw(shiftw) << "generate: " << elasped_time << std::endl; + timer.reset(); + + { + fill_map<Device> fill(global_2_local, local_2_global); + } + + // fill + elasped_time = timer.seconds(); + std::cout << std::setw(shiftw) << "fill: " << elasped_time << std::endl; + timer.reset(); + + + size_t num_errors = global_2_local.failed_insert(); + + if (num_errors == 0u) { + for (unsigned i=0; i<num_find_iterations; ++i) + { + find_test<Device> find(global_2_local, local_2_global,num_errors); + } + + // find + elasped_time = timer.seconds(); + std::cout << std::setw(shiftw) << "lookup: " << elasped_time << std::endl; + } + else { + std::cout << " !!! Fill Failed !!!" << std::endl; + } + + return num_errors; +} + +template <typename Device> +size_t run_test(unsigned num_ids, unsigned num_find_iterations) +{ + // expect to fail + unsigned capacity = (num_ids*2u)/3u; + std::cout << " 66% of needed capacity (should fail)" << std::endl; + test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations); + + //should not fail + std::cout << " 100% of needed capacity" << std::endl; + capacity = num_ids; + size_t num_errors = test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations); + + //should not fail + std::cout << " 150% of needed capacity" << std::endl; + capacity = (num_ids*3u)/2u; + num_errors += test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations); + + return num_errors; +} + + +} // namespace G2L + + +#endif //KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP + diff --git a/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..171ed4b5f6d05bd7ddacd14f915fe44d4e8913a2 --- /dev/null +++ b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp @@ -0,0 +1,149 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#include <G2L.hpp> + +namespace G2L { + +size_t run_serial(unsigned num_ids, unsigned num_find_iterations) +{ +#ifdef KOKKOS_HAVE_SERIAL + std::cout << "Serial" << std::endl; + return run_test<Kokkos::Serial>(num_ids,num_find_iterations); +#else + return 0; +#endif // KOKKOS_HAVE_SERIAL +} + +size_t run_threads(unsigned num_ids, unsigned num_find_iterations) +{ +#ifdef KOKKOS_HAVE_PTHREAD + std::cout << "Threads" << std::endl; + return run_test<Kokkos::Threads>(num_ids,num_find_iterations); +#else + return 0; +#endif +} + +size_t run_openmp(unsigned num_ids, unsigned num_find_iterations) +{ +#ifdef KOKKOS_HAVE_OPENMP + std::cout << "OpenMP" << std::endl; + return run_test<Kokkos::OpenMP>(num_ids,num_find_iterations); +#else + return 0; +#endif +} + +size_t run_cuda(unsigned num_ids, unsigned num_find_iterations) +{ +#ifdef KOKKOS_HAVE_CUDA + std::cout << "Cuda" << std::endl; + return run_test<Kokkos::Cuda>(num_ids,num_find_iterations); +#else + return 0; +#endif +} + +} // namespace G2L + + +int main(int argc, char *argv[]) +{ + unsigned num_ids = 100000; + unsigned num_find_iterations = 1000; + + if (argc == 3) { + num_ids = atoi(argv[1]); + num_find_iterations = atoi(argv[2]); + } + else if (argc != 1) { + std::cout << argv[0] << " num_ids num_find_iterations" << std::endl; + return 0; + } + + + // query the topology of the host + unsigned threads_count = 4 ; + + if (Kokkos::hwloc::available()) { + threads_count = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + + std::cout << "Threads: " << threads_count << std::endl; + std::cout << "Number of ids: " << num_ids << std::endl; + std::cout << "Number of find iterations: " << num_find_iterations << std::endl; + + size_t num_errors = 0; + + num_errors += G2L::run_serial(num_ids,num_find_iterations); + +#ifdef KOKKOS_HAVE_CUDA + Kokkos::HostSpace::execution_space::initialize(threads_count); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + num_errors += G2L::run_cuda(num_ids,num_find_iterations); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +#endif + +#ifdef KOKKOS_HAVE_PTHREAD + Kokkos::Threads::initialize( threads_count ); + num_errors += G2L::run_threads(num_ids,num_find_iterations); + Kokkos::Threads::finalize(); +#endif + +#ifdef KOKKOS_HAVE_OPENMP + Kokkos::OpenMP::initialize( threads_count ); + num_errors += G2L::run_openmp(num_ids,num_find_iterations); + Kokkos::OpenMP::finalize(); +#endif + + + return num_errors; +} + diff --git a/lib/kokkos/example/global_2_local_ids/Makefile b/lib/kokkos/example/global_2_local_ids/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbea3e09a5d71f900de85ff2100cf41bd5738 --- /dev/null +++ b/lib/kokkos/example/global_2_local_ids/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/grow_array/CMakeLists.txt b/lib/kokkos/example/grow_array/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9ff17049290af181d4f693cf9936627b28d087e --- /dev/null +++ b/lib/kokkos/example/grow_array/CMakeLists.txt @@ -0,0 +1,14 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") + +FILE(GLOB SOURCES *.cpp) + +TRIBITS_ADD_EXECUTABLE( + grow_array + SOURCES ${SOURCES} + COMM serial mpi + ) + diff --git a/lib/kokkos/example/grow_array/Makefile b/lib/kokkos/example/grow_array/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbea3e09a5d71f900de85ff2100cf41bd5738 --- /dev/null +++ b/lib/kokkos/example/grow_array/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/grow_array/grow_array.hpp b/lib/kokkos/example/grow_array/grow_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9daef1a4b084564e84559de634cd5e6ee5bb9425 --- /dev/null +++ b/lib/kokkos/example/grow_array/grow_array.hpp @@ -0,0 +1,257 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef EXAMPLE_GROW_ARRAY +#define EXAMPLE_GROW_ARRAY + +#include <stdlib.h> + +#include <Kokkos_Core.hpp> + +#include <algorithm> + +#if defined(KOKKOS_HAVE_CUDA) +#include <thrust/device_ptr.h> +#include <thrust/sort.h> +#endif + +namespace Example { + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +struct SortView { + + template< typename ValueType > + SortView( const Kokkos::View<ValueType*,ExecSpace> v , int begin , int end ) + { + std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end ); + } +}; + +#if defined(KOKKOS_HAVE_CUDA) +template<> +struct SortView< Kokkos::Cuda > { + template< typename ValueType > + SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end ) + { + thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin ) + , thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) ); + } +}; +#endif + + + +//---------------------------------------------------------------------------- + +template< class ExecSpace > +struct GrowArrayFunctor { + + typedef ExecSpace execution_space ; + + enum { SHIFT = sizeof(int) == 8 ? 6 : 5 }; // 8 or 4 byte int + enum { MASK = ( 1 << SHIFT ) - 1 }; + + const Kokkos::View<int*,ExecSpace> m_search_flags ; // bit flags for values to append + const Kokkos::View<int*,ExecSpace> m_search_array ; // array to append values + const Kokkos::View<int,ExecSpace> m_search_count ; // offset + const int m_search_total ; + const int m_search_team_chunk ; + + GrowArrayFunctor( int array_length , int search_length , int print = 1 ) + : m_search_flags( "flags" , ( search_length + MASK ) >> SHIFT ) // One bit per search entry + , m_search_array( "array" , array_length ) + , m_search_count( "count" ) + , m_search_total( search_length ) + , m_search_team_chunk( 2048 ) + {} + + KOKKOS_INLINE_FUNCTION + bool flag_is_set( const int index ) const + { + // 64 or 32 bit integer: + + const int j = index >> SHIFT ; // which integer flag + const int k = 1 << ( index & MASK ); // which bit in that integer + const int s = ( j < int(m_search_flags.dimension_0()) ) && ( 0 != ( m_search_flags(j) & k ) ); + + return s ; + } + + typedef typename Kokkos::TeamPolicy<ExecSpace>::member_type team_member ; + + KOKKOS_INLINE_FUNCTION + void operator()( const team_member & member ) const + { + enum { LOCAL_BUFFER_LENGTH = 16 }; + + int local_buffer[ LOCAL_BUFFER_LENGTH ] ; + int local_count = 0 ; + + // Each team searches 'm_search_team_chunk' indices. + // The threads of a team must iterate together because all + // threads in the team must call 'team_scan' to prevent deadlock in the team. + + int search_team_begin = member.league_rank() * m_search_team_chunk ; + const int search_team_end = search_team_begin + m_search_team_chunk ; + + int k = 0 ; + + while ( search_team_begin < search_team_end ) { + + // This iteration searches [ search_team_begin .. search_team_begin + member.team_size() ] + const int thread_search_index = search_team_begin + member.team_rank(); + + // If this thread's search index is in the range + // and the flag is set, push into this thread's local buffer. + if ( thread_search_index < m_search_total && flag_is_set(thread_search_index) ) { + local_buffer[ local_count ] = thread_search_index ; + ++local_count ; + } + + // Move the team's search range forward + search_team_begin += member.team_size(); // Striding team by team size + + // Count number of times a thread's buffer might have grown: + ++k ; + + // Write buffer if end of search or a thread might have filled its buffer. + if ( k == LOCAL_BUFFER_LENGTH /* A thread in my team might have filled its buffer */ || + ! ( search_team_begin < search_team_end ) /* Team is at the end of its search */ ) { + + // Team's exclusive scan of threads' contributions, with global offset. + // This thread writes its buffer into [ team_offset .. team_offset + local_count ) + const int team_offset = member.team_scan( local_count , & *m_search_count ); + + // Copy locally buffered entries into global array: + for ( int i = 0 ; i < local_count ; ++i ) { + m_search_array( team_offset + i ) = local_buffer[i] ; + } + + k = 0 ; + local_count = 0 ; + } + } + } +}; + + +template< class ExecSpace > +void grow_array( int array_length , int search_length , int print = 1 ) +{ + typedef GrowArrayFunctor< ExecSpace > FunctorType ; + + FunctorType functor( array_length , search_length , print ); + + typename Kokkos::View<int,ExecSpace>::HostMirror count = Kokkos::create_mirror_view( functor.m_search_count ); + typename Kokkos::View<int*,ExecSpace>::HostMirror flags = Kokkos::create_mirror_view( functor.m_search_flags ); + + // Set at most 'array_length' random bits over the search length. + for ( int i = 0 ; i < array_length ; ++i ) { + // 'lrand48()' generates random number between [0..2^31] + // index = ( lrand48() * search_length ) / ( 2^31 ) + const long int index = ( lrand48() * search_length ) >> 31 ; + // set the bit within the flags: + flags( index >> FunctorType::SHIFT ) |= ( 1 << ( index & FunctorType::MASK ) ); + } + + Kokkos::deep_copy( functor.m_search_flags , flags ); + + // Each team works on 'functor.m_search_team_chunk' span of the search_length + Kokkos::TeamPolicy< ExecSpace > + work( /* #teams */ ( search_length + functor.m_search_team_chunk - 1 ) / functor.m_search_team_chunk + , /* threads/team */ Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor ) ); + + // Fill array: + Kokkos::parallel_for( work , functor ); + + // How much was filled: + Kokkos::deep_copy( count , functor.m_search_count ); + + // Sort array: + SortView< ExecSpace >( functor.m_search_array , 0 , *count ); + + // Mirror the results: + typename Kokkos::View<int*,ExecSpace>::HostMirror results = Kokkos::create_mirror_view( functor.m_search_array ); + Kokkos::deep_copy( results , functor.m_search_array ); + + // Verify results: + int result_error_count = 0 ; + int flags_error_count = 0 ; + for ( int i = 0 ; i < *count ; ++i ) { + const int index = results(i); + const int entry = index >> FunctorType::SHIFT ; + const int bit = 1 << ( index & FunctorType::MASK ); + const bool flag = 0 != ( flags( entry ) & bit ); + if ( ! flag ) { + if ( print ) std::cerr << "result( " << i << " : " << index << " )"; + ++result_error_count ; + } + flags( entry ) &= ~bit ; // Clear that verified bit + } + + for ( int i = 0 ; i < int(flags.dimension_0()) ; ++i ) { + // If any uncleared bits then an error + if ( flags(i) ) { + if ( print ) std::cerr << "flags( " << i << " : " << flags(i) << " )" ; + ++flags_error_count ; + } + } + + if ( result_error_count || flags_error_count ) { + std::cerr << std::endl << "Example::GrowArrayFunctor( " << array_length + << " , " << search_length + << " ) result_error_count( " << result_error_count << " )" + << " ) flags_error_count( " << flags_error_count << " )" + << std::endl ; + } +} + + +} // namespace Example + +//---------------------------------------------------------------------------- + +#endif /* #ifndef EXAMPLE_GROW_ARRAY */ + diff --git a/lib/kokkos/example/grow_array/main.cpp b/lib/kokkos/example/grow_array/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4693aa3af47676957ffb11468550b2dadc7fa748 --- /dev/null +++ b/lib/kokkos/example/grow_array/main.cpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <sstream> + +#include <Kokkos_Core.hpp> + +#include <grow_array.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +int main( int argc , char ** argv ) +{ + int num_threads = 4 ; + int use_numa = 1 ; + int use_core = 1 ; + int length_array = 1000000 ; + int span_values = 100000000 ; + + + if ( Kokkos::hwloc::available() ) { + use_numa = Kokkos::hwloc::get_available_numa_count(); + use_core = Kokkos::hwloc::get_available_cores_per_numa() - 1 ; + num_threads = use_numa * use_core * Kokkos::hwloc::get_available_threads_per_core(); + } + +#if defined( KOKKOS_HAVE_SERIAL ) + { + std::cout << "Kokkos::Serial" << std::endl ; + // The Serial device accepts these arguments, though it may ignore them. + Kokkos::Serial::initialize( num_threads , use_numa , use_core ); + Example::grow_array< Kokkos::Serial >( length_array , span_values ); + Kokkos::Serial::finalize (); + } +#endif // defined( KOKKOS_HAVE_SERIAL ) + +#if defined( KOKKOS_HAVE_PTHREAD ) + { + std::cout << "Kokkos::Threads" << std::endl ; + Kokkos::Threads::initialize( num_threads , use_numa , use_core ); + Example::grow_array< Kokkos::Threads >( length_array , span_values ); + Kokkos::Threads::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + { + std::cout << "Kokkos::OpenMP" << std::endl ; + Kokkos::OpenMP::initialize( num_threads , use_numa , use_core ); + Example::grow_array< Kokkos::OpenMP >( length_array , span_values ); + Kokkos::OpenMP::finalize(); + } +#endif + +#if defined( KOKKOS_HAVE_CUDA ) + { + std::cout << "Kokkos::Cuda" << std::endl ; + Kokkos::HostSpace::execution_space::initialize(1); + Kokkos::Cuda::initialize(); + Example::grow_array< Kokkos::Cuda >( length_array , span_values ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +#endif + + return 0 ; +} + diff --git a/lib/kokkos/example/ichol/Makefile b/lib/kokkos/example/ichol/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..57e972f042d94c337e8d6b73fffcec2e0d40ad90 --- /dev/null +++ b/lib/kokkos/example/ichol/Makefile @@ -0,0 +1,63 @@ +SCOTCH_PATH = /home/hcedwar/scotch/6.0.0 +KOKKOS_PATH = ../.. + +vpath %.cpp ${KOKKOS_PATH}/example/ichol/src ${KOKKOS_PATH}/example/ichol/example + +EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/ichol/src/*.hpp ${KOKKOS_PATH}/example/ichol/example/*.hpp ) + +default: build_all + echo "End Build" + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += \ + -I${KOKKOS_PATH}/example/ichol/src \ + -I${KOKKOS_PATH}/example/ichol/example \ + -I${SCOTCH_PATH}/include + +EXE_EXAMPLE_ICHOL_THREADS = KokkosExample_ichol_threads +OBJ_EXAMPLE_ICHOL_THREADS = example_chol_performance_device_pthread.o + +EXE_EXAMPLE_ICHOL_CUDA = KokkosExample_ichol_cuda +OBJ_EXAMPLE_ICHOL_CUDA = example_chol_performance_device_cuda.o + +TARGETS = $(EXE_EXAMPLE_ICHOL_THREADS) $(EXE_EXAMPLE_ICHOL_CUDA) + +#TEST_TARGETS = + +$(EXE_EXAMPLE_ICHOL_THREADS) : $(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \ + $(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LIBS) $(LIB) \ + -L${SCOTCH_PATH}/lib -lscotch -lscotcherr -lscotcherrexit \ + -o $(EXE_EXAMPLE_ICHOL_THREADS) + +$(EXE_EXAMPLE_ICHOL_CUDA) : $(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \ + $(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LIBS) $(LIB) \ + -L${SCOTCH_PATH}/lib -lscotch -lscotcherr -lscotcherrexit \ + -o $(EXE_EXAMPLE_ICHOL_CUDA) + +build_all : $(TARGETS) + +test : build_all + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp b/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ca819e4f97028eb0782c7e6c5638945d40f7597b --- /dev/null +++ b/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp @@ -0,0 +1,240 @@ +#pragma once +#ifndef __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__ +#define __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__ + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> + +#include "util.hpp" + +#include "crs_matrix_base.hpp" +#include "crs_matrix_view.hpp" +#include "crs_row_view.hpp" + +#include "graph_helper_scotch.hpp" +#include "symbolic_factor_helper.hpp" +#include "crs_matrix_helper.hpp" + +#include "task_view.hpp" + +#include "task_factory.hpp" + +#include "chol.hpp" + +namespace Tacho { + + using namespace std; + + template<typename ValueType, + typename OrdinalType, + typename SizeType = OrdinalType, + typename SpaceType = void> + int exampleCholPerformanceDevice(const string file_input, + const int treecut, + const int prunecut, + const int seed, + const int nthreads, + const int max_task_dependence, + const int max_concurrency, + const int team_size, + const int fill_level, + const int league_size, + const bool skip_serial, + const bool verbose) { + typedef ValueType value_type; + typedef OrdinalType ordinal_type; + typedef SizeType size_type; + typedef typename + Kokkos::Impl::is_space< SpaceType >::host_mirror_space::execution_space + HostSpaceType ; + + typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>, + Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType; + + typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType> + CrsMatrixBaseType; + + typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> + CrsMatrixBaseHostType; + + typedef Kokkos::MemoryUnmanaged MemoryUnmanaged ; + + typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryUnmanaged > + CrsMatrixNestedType; + + + typedef GraphHelper_Scotch<CrsMatrixBaseHostType> GraphHelperType; + typedef SymbolicFactorHelper<CrsMatrixBaseHostType> SymbolicFactorHelperType; + + typedef CrsMatrixView<CrsMatrixNestedType> CrsMatrixViewType; + typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType; + + typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType> CrsHierMatrixBaseType; + + typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType; + typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType; + + int r_val = 0; + + Kokkos::Timer timer; + double + t_import = 0.0, + t_reorder = 0.0, + t_symbolic = 0.0, + t_flat2hier = 0.0, + t_factor_task = 0.0; + + cout << "CholPerformanceDevice:: import input file = " << file_input << endl; + CrsMatrixBaseHostType AA("AA"); + { + timer.reset(); + + ifstream in; + in.open(file_input); + if (!in.good()) { + cout << "Failed in open the file: " << file_input << endl; + return ++r_val; + } + AA.importMatrixMarket(in); + + t_import = timer.seconds(); + + if (verbose) { + AA.showMe( std::cout ); + std::cout << endl; + } + } + cout << "CholPerformanceDevice:: import input file::time = " << t_import << endl; + + cout << "CholPerformanceDevice:: reorder the matrix" << endl; + CrsMatrixBaseHostType PA("Permuted AA"); + + // '*_UU' is the permuted base upper triangular matrix + CrsMatrixBaseHostType host_UU("host_UU"); + CrsMatrixBaseType device_UU("UU"); + CrsHierMatrixBaseType device_HU("HU");; + + // typename CrsMatrixBaseHostType host_UU("host_UU"); + + { + typename GraphHelperType::size_type_array rptr("Graph::RowPtrArray", AA.NumRows() + 1); + typename GraphHelperType::ordinal_type_array cidx("Graph::ColIndexArray", AA.NumNonZeros()); + + AA.convertGraph(rptr, cidx); + GraphHelperType S("ScotchHelper", + AA.NumRows(), + rptr, + cidx, + seed); + { + timer.reset(); + + S.computeOrdering(treecut, 0); + S.pruneTree(prunecut); + + PA.copy(S.PermVector(), S.InvPermVector(), AA); + + t_reorder = timer.seconds(); + + if (verbose) { + S.showMe( std::cout ); + std::cout << std::endl ; + PA.showMe( std::cout ); + std::cout << std::endl ; + } + } + + // Symbolic factorization adds non-zero entries + // for factorization levels. + // Runs on the host process and currently requires std::sort. + + cout << "CholPerformanceDevice:: reorder the matrix::time = " << t_reorder << endl; + { + SymbolicFactorHelperType F(PA, league_size); + timer.reset(); + F.createNonZeroPattern(fill_level, Uplo::Upper, host_UU); + t_symbolic = timer.seconds(); + cout << "CholPerformanceDevice:: AA (nnz) = " << AA.NumNonZeros() << ", host_UU (nnz) = " << host_UU.NumNonZeros() << endl; + + if (verbose) { + F.showMe( std::cout ); + std::cout << std::endl ; + host_UU.showMe( std::cout ); + std::cout << std::endl ; + } + } + cout << "CholPerformanceDevice:: symbolic factorization::time = " << t_symbolic << endl; + + //---------------------------------------------------------------------- + // Allocate device_UU conformal to host_UU + // and deep_copy host_UU arrays to device_UU arrays. + // Set up device_HU referencing blocks of device_UU + + { + timer.reset(); + + device_UU.copy( host_UU ); + + CrsMatrixHelper::flat2hier(Uplo::Upper, device_UU, device_HU, + S.NumBlocks(), + S.RangeVector(), + S.TreeVector()); + + // Filling non-zero block matrixes' row ranges within block view. + // This is performed entirely in the 'device_HU' space. + + CrsMatrixHelper::fillRowViewArray( device_HU ); + + t_flat2hier = timer.seconds(); + + cout << "CholPerformanceDevice:: Hier (dof, nnz) = " << device_HU.NumRows() << ", " << device_HU.NumNonZeros() << endl; + } + cout << "CholPerformanceDevice:: copy base matrix and construct hierarchical matrix::time = " << t_flat2hier << endl; + } + + cout << "CholPerformanceDevice:: max concurrency = " << max_concurrency << endl; + + const size_t max_task_size = 4*sizeof(CrsTaskViewType)+128; + cout << "CholPerformanceDevice:: max task size = " << max_task_size << endl; + + //---------------------------------------------------------------------- + // From here onward all work is on the device. + //---------------------------------------------------------------------- + + { + typename TaskFactoryType::policy_type policy(max_concurrency, + max_task_size, + max_task_dependence, + team_size); + + cout << "CholPerformanceDevice:: ByBlocks factorize the matrix:: team_size = " << team_size << endl; + CrsHierTaskViewType H( device_HU ); + { + timer.reset(); + { + // auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks>:: + auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks,Variant::Two>:: + TaskFunctor<CrsHierTaskViewType>(policy,H), 0); + policy.spawn(future); + Kokkos::Experimental::wait(policy); + } + t_factor_task += timer.seconds(); + + cout << "CholPerformanceDevice:: policy.allocated_task_count = " + << policy.allocated_task_count() + << endl ; + + if (verbose) { + host_UU.copy( device_UU ); + host_UU.showMe( std::cout ); + std::cout << endl; + } + } + cout << "CholPerformanceDevice:: ByBlocks factorize the matrix::time = " << t_factor_task << endl; + } + + return r_val; + } +} + +#endif diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a0df586b5af15a9c56582d216ecac6e5221853d --- /dev/null +++ b/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp @@ -0,0 +1,70 @@ +#include <Kokkos_Core.hpp> + +#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> + +using namespace std; + +typedef double value_type; +typedef int ordinal_type; +typedef int size_type; + +#include "example_chol_performance_device.hpp" + +using namespace Tacho; + +int main (int argc, char *argv[]) { + + string file_input = "test.mtx"; + int nthreads = 1; + int max_task_dependence = 3; + int max_concurrency = 1024; + int team_size = 1; + int fill_level = 0; + int treecut = 0; + int prunecut = 0; + int seed = 0; + int league_size = 1; + bool verbose = false; + for (int i=0;i<argc;++i) { + if ((strcmp(argv[i],"--file-input") ==0)) { file_input = argv[++i]; continue;} + if ((strcmp(argv[i],"--nthreads") ==0)) { nthreads = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--max-concurrency") ==0)) { max_concurrency = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--team-size") ==0)) { team_size = atoi(argv[++i]); continue;} + + if ((strcmp(argv[i],"--fill-level") ==0)) { fill_level = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--league-size") ==0)) { league_size = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--treecut") ==0)) { treecut = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--prunecut") ==0)) { prunecut = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--seed") ==0)) { seed = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--enable-verbose") ==0)) { verbose = true; continue;} + } + + int r_val = 0; + { + typedef Kokkos::Cuda exec_space; + + Kokkos::DefaultHostExecutionSpace::initialize(nthreads); + + exec_space::initialize(); + exec_space::print_configuration(cout, true); + + r_val = exampleCholPerformanceDevice + <value_type,ordinal_type,size_type,exec_space> + (file_input, + treecut, + prunecut, + seed, + nthreads, + max_task_dependence, max_concurrency, team_size, + fill_level, league_size, + (nthreads != 1), // skip_serial + verbose); + + exec_space::finalize(); + + Kokkos::DefaultHostExecutionSpace::finalize(); + } + + return r_val; +} diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp new file mode 100644 index 0000000000000000000000000000000000000000..68f520cf6620888c2a8de2f8cabe06a5b9e8b607 --- /dev/null +++ b/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp @@ -0,0 +1,67 @@ +#include <Kokkos_Core.hpp> + +#include <Kokkos_Threads.hpp> +#include <Threads/Kokkos_Threads_TaskPolicy.hpp> + +using namespace std; + +typedef double value_type; +typedef int ordinal_type; +typedef int size_type; + +typedef Kokkos::Threads exec_space; + +#include "example_chol_performance_device.hpp" + +using namespace Tacho; + +int main (int argc, char *argv[]) { + + string file_input = "test.mtx"; + int nthreads = 1; + int max_task_dependence = 3; + int max_concurrency = 1024; + int team_size = 1; + int fill_level = 0; + int treecut = 0; + int prunecut = 0; + int seed = 0; + int league_size = 1; + bool verbose = false; + for (int i=0;i<argc;++i) { + if ((strcmp(argv[i],"--file-input") ==0)) { file_input = argv[++i]; continue;} + if ((strcmp(argv[i],"--nthreads") ==0)) { nthreads = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--max-concurrency") ==0)) { max_concurrency = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--team-size") ==0)) { team_size = atoi(argv[++i]); continue;} + + if ((strcmp(argv[i],"--fill-level") ==0)) { fill_level = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--league-size") ==0)) { league_size = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--treecut") ==0)) { treecut = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--prunecut") ==0)) { prunecut = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--seed") ==0)) { seed = atoi(argv[++i]); continue;} + if ((strcmp(argv[i],"--enable-verbose") ==0)) { verbose = true; continue;} + } + + int r_val = 0; + { + exec_space::initialize(nthreads); + exec_space::print_configuration(cout, true); + + r_val = exampleCholPerformanceDevice + <value_type,ordinal_type,size_type,exec_space> + (file_input, + treecut, + prunecut, + seed, + nthreads, + max_task_dependence, max_concurrency, team_size, + fill_level, league_size, + (nthreads != 1), // skip_serial + verbose); + + exec_space::finalize(); + } + + return r_val; +} diff --git a/lib/kokkos/example/ichol/src/chol.hpp b/lib/kokkos/example/ichol/src/chol.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e8aa4e9189ffb607c91cc2b86811084b69a45393 --- /dev/null +++ b/lib/kokkos/example/ichol/src/chol.hpp @@ -0,0 +1,92 @@ +#pragma once +#ifndef __CHOL_HPP__ +#define __CHOL_HPP__ + +/// \file chol.hpp +/// \brief Incomplete Cholesky factorization front interface. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "control.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + // tasking interface + // * default behavior is for non-by-blocks tasks + // * control is only used for by-blocks algorithms + // =============================================== + template<int ArgUplo, int ArgAlgo, + int ArgVariant = Variant::One, + template<int,int> class ControlType = Control> + class Chol { + public: + + // function interface + // ================== + template<typename ExecViewType> + KOKKOS_INLINE_FUNCTION + static int invoke(typename ExecViewType::policy_type &policy, + const typename ExecViewType::policy_type::member_type &member, + typename ExecViewType::matrix_type &A); + + // task-data parallel interface + // ============================ + template<typename ExecViewType> + class TaskFunctor { + public: + typedef typename ExecViewType::policy_type policy_type; + typedef typename policy_type::member_type member_type; + typedef int value_type; + + private: + typename ExecViewType::matrix_type _A; + + policy_type _policy; + + public: + KOKKOS_INLINE_FUNCTION + TaskFunctor(const policy_type & P , + const typename ExecViewType::matrix_type & A) + : _A(A), + _policy(P) + { } + + string Label() const { return "Chol"; } + + // task execution + KOKKOS_INLINE_FUNCTION + void apply(value_type &r_val) { + r_val = Chol::invoke<ExecViewType>(_policy, _policy.member_single(), _A); + } + + // task-data execution + KOKKOS_INLINE_FUNCTION + void apply(const member_type &member, value_type &r_val) { + + const int result = Chol::invoke<ExecViewType>(_policy, member, _A); + + if ( 0 == member.team_rank() ) { r_val = result ; } + + } + + }; + + }; +} + + +// unblocked version blas operations +#include "scale.hpp" + +// blocked version blas operations +#include "gemm.hpp" +#include "trsm.hpp" +#include "herk.hpp" + +// cholesky +#include "chol_u.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/chol_u.hpp b/lib/kokkos/example/ichol/src/chol_u.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0465ef8f35c8574189c767b6f97dfc7a6344f2cb --- /dev/null +++ b/lib/kokkos/example/ichol/src/chol_u.hpp @@ -0,0 +1,23 @@ +#pragma once +#ifndef __CHOL_U_HPP__ +#define __CHOL_U_HPP__ + +/// \file chol_u.hpp +/// \brief Upper Cholesky factorization variations +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +// testing task-data parallelism +// #include "chol_u_unblocked_dummy.hpp" + +// flame style implementation +//#include "chol_unblocked.hpp" +//#include "chol_u_blocked.hpp" + +// triple for loop +#include "chol_u_unblocked_opt1.hpp" +#include "chol_u_unblocked_opt2.hpp" + +// partitioned block algorithms: see control.hpp +#include "chol_u_right_look_by_blocks.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp b/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e21bafa9f1db5e9dda1a0e24f21a4552f011d27a --- /dev/null +++ b/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp @@ -0,0 +1,394 @@ +#pragma once +#ifndef __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__ +#define __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__ + +/// \file chol_u_right_look_by_blocks.hpp +/// \brief Cholesky factorization by-blocks +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +/// The Partitioned-Block Matrix (PBM) is sparse and a block itself is a view of a sparse matrix. +/// The algorithm generates tasks with a given sparse block matrix structure. + +// basic utils +#include "util.hpp" +#include "control.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template< typename CrsTaskViewType > + KOKKOS_INLINE_FUNCTION + int releaseFutures( typename CrsTaskViewType::matrix_type & A ) + { + typedef typename CrsTaskViewType::ordinal_type ordinal_type; + typedef typename CrsTaskViewType::row_view_type row_view_type; + typedef typename CrsTaskViewType::future_type future_type; + + row_view_type a(A,0); + + const ordinal_type nnz = a.NumNonZeros(); + + for (ordinal_type j=0;j<nnz;++j) { + a.Value(j).setFuture( future_type() ); + } + + return nnz ; + } + + // ======================================== + // detailed workflow of by-blocks algorithm + // ======================================== + template<int ArgVariant, + template<int,int> class ControlType, + typename CrsTaskViewType> + class CholUpperRightLookByBlocks { + public: + KOKKOS_INLINE_FUNCTION + static int genScalarTask(typename CrsTaskViewType::policy_type &policy, + typename CrsTaskViewType::matrix_type &A) { + typedef typename CrsTaskViewType::value_type value_type; + typedef typename CrsTaskViewType::row_view_type row_view_type; + + typedef typename CrsTaskViewType::future_type future_type; + typedef typename CrsTaskViewType::task_factory_type task_factory_type; + + row_view_type a(A, 0); + value_type &aa = a.Value(0); + + // construct a task + future_type f = task_factory_type::create(policy, + typename Chol<Uplo::Upper, + CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Chol)> + ::template TaskFunctor<value_type>(policy,aa)); + + +if ( false ) { + printf("Chol [%d +%d)x[%d +%d) spawn depend %d\n" + , aa.OffsetRows() + , aa.NumRows() + , aa.OffsetCols() + , aa.NumCols() + , int( ! aa.Future().is_null() ) + ); +} + + // manage dependence + task_factory_type::addDependence(policy, f, aa.Future()); + aa.setFuture(f); + + // spawn a task + task_factory_type::spawn(policy, f, true /* high priority */ ); + + return 1; + } + + KOKKOS_INLINE_FUNCTION + static int genTrsmTasks(typename CrsTaskViewType::policy_type &policy, + typename CrsTaskViewType::matrix_type &A, + typename CrsTaskViewType::matrix_type &B) { + typedef typename CrsTaskViewType::ordinal_type ordinal_type; + typedef typename CrsTaskViewType::row_view_type row_view_type; + typedef typename CrsTaskViewType::value_type value_type; + + typedef typename CrsTaskViewType::future_type future_type; + typedef typename CrsTaskViewType::task_factory_type task_factory_type; + + row_view_type a(A,0), b(B,0); + value_type &aa = a.Value(0); + +if ( false ) { + printf("genTrsmTasks after aa.Future().reference_count = %d\n" + , aa.Future().reference_count()); +} + const ordinal_type nnz = b.NumNonZeros(); + for (ordinal_type j=0;j<nnz;++j) { + typedef typename + Trsm< Side::Left,Uplo::Upper,Trans::ConjTranspose, + CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Trsm)> + ::template TaskFunctor<double,value_type,value_type> + FunctorType ; + + value_type &bb = b.Value(j); + + future_type f = task_factory_type + ::create(policy, FunctorType(policy,Diag::NonUnit, 1.0, aa, bb)); + +if ( false ) { + printf("Trsm [%d +%d)x[%d +%d) spawn depend %d %d\n" + , bb.OffsetRows() + , bb.NumRows() + , bb.OffsetCols() + , bb.NumCols() + , int( ! aa.Future().is_null() ) + , int( ! bb.Future().is_null() ) + ); +} + + // trsm dependence + task_factory_type::addDependence(policy, f, aa.Future()); + + // self + task_factory_type::addDependence(policy, f, bb.Future()); + + // place task signature on b + bb.setFuture(f); + + // spawn a task + task_factory_type::spawn(policy, f, true /* high priority */); + } + +if ( false ) { + printf("genTrsmTasks after aa.Future().reference_count = %d\n" + , aa.Future().reference_count()); +} + + return nnz ; + } + + KOKKOS_INLINE_FUNCTION + static int genHerkTasks(typename CrsTaskViewType::policy_type &policy, + typename CrsTaskViewType::matrix_type &A, + typename CrsTaskViewType::matrix_type &C) { + typedef typename CrsTaskViewType::ordinal_type ordinal_type; + typedef typename CrsTaskViewType::value_type value_type; + typedef typename CrsTaskViewType::row_view_type row_view_type; + + typedef typename CrsTaskViewType::future_type future_type; + typedef typename CrsTaskViewType::task_factory_type task_factory_type; + + // case that X.transpose, A.no_transpose, Y.no_transpose + + row_view_type a(A,0), c; + + const ordinal_type nnz = a.NumNonZeros(); + ordinal_type herk_count = 0 ; + ordinal_type gemm_count = 0 ; + + // update herk + for (ordinal_type i=0;i<nnz;++i) { + const ordinal_type row_at_i = a.Col(i); + value_type &aa = a.Value(i); + + c.setView(C, row_at_i); + + ordinal_type idx = 0; + for (ordinal_type j=i;j<nnz && (idx > -2);++j) { + const ordinal_type col_at_j = a.Col(j); + value_type &bb = a.Value(j); + + if (row_at_i == col_at_j) { + idx = c.Index(row_at_i, idx); + if (idx >= 0) { + ++herk_count ; + value_type &cc = c.Value(idx); + future_type f = task_factory_type + ::create(policy, + typename Herk<Uplo::Upper,Trans::ConjTranspose, + CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Herk)> + ::template TaskFunctor<double,value_type,value_type>(policy,-1.0, aa, 1.0, cc)); + + +if ( false ) { + printf("Herk [%d +%d)x[%d +%d) spawn %d %d\n" + , cc.OffsetRows() + , cc.NumRows() + , cc.OffsetCols() + , cc.NumCols() + , int( ! aa.Future().is_null() ) + , int( ! cc.Future().is_null() ) + ); +} + + // dependence + task_factory_type::addDependence(policy, f, aa.Future()); + + // self + task_factory_type::addDependence(policy, f, cc.Future()); + + // place task signature on y + cc.setFuture(f); + + // spawn a task + task_factory_type::spawn(policy, f); + } + } else { + idx = c.Index(col_at_j, idx); + if (idx >= 0) { + ++gemm_count ; + value_type &cc = c.Value(idx); + future_type f = task_factory_type + ::create(policy, + typename Gemm<Trans::ConjTranspose,Trans::NoTranspose, + CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Gemm)> + ::template TaskFunctor<double,value_type,value_type,value_type>(policy,-1.0, aa, bb, 1.0, cc)); + + +if ( false ) { + printf("Gemm [%d +%d)x[%d +%d) spawn %d %d %d\n" + , cc.OffsetRows() + , cc.NumRows() + , cc.OffsetCols() + , cc.NumCols() + , int( ! aa.Future().is_null() ) + , int( ! bb.Future().is_null() ) + , int( ! cc.Future().is_null() ) + ); +} + + // dependence + task_factory_type::addDependence(policy, f, aa.Future()); + task_factory_type::addDependence(policy, f, bb.Future()); + + // self + task_factory_type::addDependence(policy, f, cc.Future()); + + // place task signature on y + cc.setFuture(f); + + // spawn a task + task_factory_type::spawn(policy, f); + } + } + } + } + +if ( false ) { +printf("genHerkTask Herk(%ld) Gemm(%ld)\n",(long)herk_count,(long)gemm_count); +} + + return herk_count + gemm_count ; + } + + }; + + // specialization for different task generation in right looking by-blocks algorithm + // ================================================================================= + template<int ArgVariant, template<int,int> class ControlType> + class Chol<Uplo::Upper,AlgoChol::RightLookByBlocks,ArgVariant,ControlType> { + public: + + // function interface + // ================== + template<typename ExecViewType> + KOKKOS_INLINE_FUNCTION + static int invoke(typename ExecViewType::policy_type &policy, + const typename ExecViewType::policy_type::member_type &member, + typename ExecViewType::matrix_type & A, + int checkpoint ) + { + typedef typename ExecViewType::row_view_type row_view_type ; + + enum { CYCLE = 2 }; + + typename ExecViewType::matrix_type + ATL, ATR, A00, A01, A02, + ABL, ABR, A10, A11, A12, + A20, A21, A22; + + Part_2x2(A, ATL, ATR, + /**/ABL, ABR, + checkpoint, checkpoint, Partition::TopLeft); + + int tasks_spawned = 0 ; + int futures_released = 0 ; + + for ( int i = 0 ; i < CYCLE && ATL.NumRows() < A.NumRows() ; ++i ) { + Part_2x2_to_3x3(ATL, ATR, /**/ A00, A01, A02, + /*******/ /**/ A10, A11, A12, + ABL, ABR, /**/ A20, A21, A22, + 1, 1, Partition::BottomRight); + // ----------------------------------------------------- + // Spawning tasks: + + // A11 = chol(A11) : #task = 1 + tasks_spawned += + CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> + ::genScalarTask(policy, A11); + + // A12 = inv(triu(A11)') * A12 : #tasks = non-zero row blocks + tasks_spawned += + CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> + ::genTrsmTasks(policy, A11, A12); + + // A22 = A22 - A12' * A12 : #tasks = highly variable + tasks_spawned += + CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> + ::genHerkTasks(policy, A12, A22); + + // ----------------------------------------------------- + // Can release futures of A11 and A12 + + futures_released += releaseFutures<ExecViewType>( A11 ); + futures_released += releaseFutures<ExecViewType>( A12 ); + +if ( false ) { + printf("Chol iteration(%d) task_count(%d) cumulative: spawn(%d) release(%d)\n" + , int(ATL.NumRows()) + , policy.allocated_task_count() + , tasks_spawned , futures_released + ); +} + + // ----------------------------------------------------- + Merge_3x3_to_2x2(A00, A01, A02, /**/ ATL, ATR, + A10, A11, A12, /**/ /******/ + A20, A21, A22, /**/ ABL, ABR, + Partition::TopLeft); + + } + + return ATL.NumRows(); + } + + // task-data parallel interface + // ============================ + template<typename ExecViewType> + class TaskFunctor { + public: + typedef typename ExecViewType::policy_type policy_type; + typedef typename ExecViewType::future_type future_type; + typedef typename policy_type::member_type member_type; + typedef int value_type; + + private: + typename ExecViewType::matrix_type _A; + + policy_type _policy; + int _checkpoint ; + + public: + KOKKOS_INLINE_FUNCTION + TaskFunctor(const policy_type & P , + const typename ExecViewType::matrix_type & A) + : _A(A), + _policy(P), + _checkpoint(0) + { } + + string Label() const { return "Chol"; } + + // task-data execution + KOKKOS_INLINE_FUNCTION + void apply(const member_type &member, value_type &r_val) + { + if (member.team_rank() == 0) { + // Clear out previous dependence + _policy.clear_dependence( this ); + + _checkpoint = Chol::invoke<ExecViewType>(_policy, member, _A, _checkpoint); + + if ( _checkpoint < _A.NumRows() ) _policy.respawn_needing_memory(this); + + r_val = 0 ; + } + return ; + } + + }; + + }; +} + +#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3bb99c71424f491bbb5bea712475fcac116ad24e --- /dev/null +++ b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp @@ -0,0 +1,90 @@ +#pragma once +#ifndef __CHOL_U_UNBLOCKED_OPT1_HPP__ +#define __CHOL_U_UNBLOCKED_OPT1_HPP__ + +/// \file chol_u_unblocked_opt1.hpp +/// \brief Unblocked incomplete Chloesky factorization. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template<> + template<typename CrsExecViewType> + KOKKOS_INLINE_FUNCTION + int + Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::One> + ::invoke(typename CrsExecViewType::policy_type &policy, + const typename CrsExecViewType::policy_type::member_type &member, + typename CrsExecViewType::matrix_type &A) { + + typedef typename CrsExecViewType::value_type value_type; + typedef typename CrsExecViewType::ordinal_type ordinal_type; + typedef typename CrsExecViewType::row_view_type row_view_type; + + // row_view_type r1t, r2t; + + for (ordinal_type k=0;k<A.NumRows();++k) { + //r1t.setView(A, k); + row_view_type &r1t = A.RowView(k); + + // extract diagonal from alpha11 + value_type &alpha = r1t.Value(0); + + if (member.team_rank() == 0) { + // if encounter null diag or wrong index, return -(row + 1) + if (abs(alpha) == 0.0 || r1t.Col(0) != k) + return -(k + 1); + + // error handling should be more carefully designed + + // sqrt on diag + // alpha = sqrt(real(alpha)); + alpha = sqrt(alpha); + } + member.team_barrier(); + + const ordinal_type nnz_r1t = r1t.NumNonZeros(); + + if (nnz_r1t) { + // inverse scale + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), + [&](const ordinal_type j) { + r1t.Value(j) /= alpha; + }); + + member.team_barrier(); + + // hermitian rank update + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), + [&](const ordinal_type i) { + const ordinal_type row_at_i = r1t.Col(i); + // const value_type val_at_i = conj(r1t.Value(i)); + const value_type val_at_i = r1t.Value(i); + + //r2t.setView(A, row_at_i); + row_view_type &r2t = A.RowView(row_at_i); + ordinal_type idx = 0; + + for (ordinal_type j=i;j<nnz_r1t && (idx > -2);++j) { + const ordinal_type col_at_j = r1t.Col(j); + idx = r2t.Index(col_at_j, idx); + + if (idx >= 0) { + const value_type val_at_j = r1t.Value(j); + r2t.Value(idx) -= val_at_i*val_at_j; + } + } + }); + } + } + return 0; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e7d1dc826235120a84af25ff239fb705c65489f0 --- /dev/null +++ b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp @@ -0,0 +1,154 @@ +#pragma once +#ifndef __CHOL_U_UNBLOCKED_OPT2_HPP__ +#define __CHOL_U_UNBLOCKED_OPT2_HPP__ + +/// \file chol_u_unblocked_opt2.hpp +/// \brief Unblocked incomplete Chloesky factorization; version for data parallel sharing L1 cache. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template<> + template<typename CrsExecViewType> + KOKKOS_INLINE_FUNCTION + int + Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::Two> + ::invoke(typename CrsExecViewType::policy_type &policy, + const typename CrsExecViewType::policy_type::member_type &member, + typename CrsExecViewType::matrix_type &A) { + + typedef typename CrsExecViewType::value_type value_type; + typedef typename CrsExecViewType::ordinal_type ordinal_type; + typedef typename CrsExecViewType::row_view_type row_view_type; + +if ( false && member.team_rank() == 0 ) { + printf("Chol [%d +%d)x[%d +%d) begin\n" + , A.OffsetRows() + , A.NumRows() + , A.OffsetCols() + , A.NumCols() + ); +} + + // row_view_type r1t, r2t; + + for (ordinal_type k=0;k<A.NumRows();++k) { + //r1t.setView(A, k); + row_view_type &r1t = A.RowView(k); + + // extract diagonal from alpha11 + value_type &alpha = r1t.Value(0); + + if (member.team_rank() == 0) { + // if encounter null diag or wrong index, return -(row + 1) + if (abs(alpha) == 0.0 || r1t.Col(0) != k) + return -(k + 1); + + // error handling should be more carefully designed + + // sqrt on diag + // alpha = sqrt(real(alpha)); + alpha = sqrt(alpha); + } + member.team_barrier(); + + +if ( false && member.team_rank() == 0 ) { + printf("Chol [%d +%d)x[%d +%d) local row %d\n" + , A.OffsetRows() + , A.NumRows() + , A.OffsetCols() + , A.NumCols() + , int(k) + ); +} + + + const ordinal_type nnz_r1t = r1t.NumNonZeros(); + + if (nnz_r1t) { + // inverse scale + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), + [&](const ordinal_type j) { + r1t.Value(j) /= alpha; + }); + + member.team_barrier(); + + +if ( false && member.team_rank() == 0 ) { + printf("Chol [%d +%d)x[%d +%d) local row %d nnz_r1t\n" + , A.OffsetRows() + , A.NumRows() + , A.OffsetCols() + , A.NumCols() + , int(k) + ); +} + + // hermitian rank update + for (ordinal_type i=1;i<nnz_r1t;++i) { + const ordinal_type row_at_i = r1t.Col(i); + // const value_type val_at_i = conj(r1t.Value(i)); + const value_type val_at_i = r1t.Value(i); + + //r2t.setView(A, row_at_i); + row_view_type &r2t = A.RowView(row_at_i); + + ordinal_type member_idx = 0 ; + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, i, nnz_r1t), + [&](const ordinal_type j) { + if (member_idx > -2) { + const ordinal_type col_at_j = r1t.Col(j); + member_idx = r2t.Index(col_at_j, member_idx); + if (member_idx >= 0) { + const value_type val_at_j = r1t.Value(j); + r2t.Value(member_idx) -= val_at_i*val_at_j; + } + } + }); + } + } + + +if ( false ) { +member.team_barrier(); +if ( member.team_rank() == 0 ) { + printf("Chol [%d +%d)x[%d +%d) local row %d end\n" + , A.OffsetRows() + , A.NumRows() + , A.OffsetCols() + , A.NumCols() + , int(k) + ); +} +} + + } + + +if ( false ) { +member.team_barrier(); +if ( member.team_rank() == 0 ) { + printf("Chol [%d +%d)x[%d +%d) end\n" + , A.OffsetRows() + , A.NumRows() + , A.OffsetCols() + , A.NumCols() + ); +} +} + + + return 0; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/control.hpp b/lib/kokkos/example/ichol/src/control.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bf5efef9fded8685f646d81855469f6f363b1e73 --- /dev/null +++ b/lib/kokkos/example/ichol/src/control.hpp @@ -0,0 +1,110 @@ +#pragma once +#ifndef __CONTROL_HPP__ +#define __CONTROL_HPP__ + +#include "util.hpp" + +/// \file control.hpp +/// \brief A collection of control trees composing high-level variants of algorithms. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +/// description is a bit wrong + +using namespace std; + +namespace Tacho { + + // forward declaration for control tree + template<int ArgAlgo, int ArgVariant> + struct Control { + static constexpr int Self[2] = { ArgAlgo, ArgVariant }; + }; + + // ---------------------------------------------------------------------------------- + + // - CholByblocks Variant 1 + // * partitioned block matrix (blocks are sparse) + template<> struct Control<AlgoChol::ByBlocks,Variant::One> { + // chol var 1 : nested data parallel for is applied in the second inner loop + // chol var 2 : nested data parallel for is applied in the most inner loop + static constexpr int Chol[2] = { AlgoChol::UnblockedOpt, Variant::Two }; + static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; + static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; + static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; + }; + + // - CholByBlocks Variant 2 + // * diagonal blocks have nested dense blocks + template<> struct Control<AlgoChol::ByBlocks,Variant::Two> { + static constexpr int Chol[2] = { AlgoChol::UnblockedOpt, Variant::One }; + static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; + static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; + static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; + }; + + // - CholByBlocks Variant 3 + // * all blocks have nested dense blocks (full supernodal algorithm) + // template<> struct Control<AlgoChol::ByBlocks,Variant::Three> { + // static constexpr int Chol[2] = { AlgoChol::NestedDenseBlock, Variant::One }; + // static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseBlock, Variant::One }; + // static constexpr int Herk[2] = { AlgoHerk::NestedDenseBlock, Variant::One }; + // static constexpr int Gemm[2] = { AlgoGemm::NestedDenseBlock, Variant::One }; + // }; + + // - CholByBlocks Variant 4 + // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling) + // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> { + // static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; + // static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; + // static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; + // static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; + //}; + + // - CholByBlocks Variant 5 + // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling) + // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> { + // static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; + // static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseByBlocks, Variant::One }; + // static constexpr int Herk[2] = { AlgoHerk::NestedDenseByBlocks, Variant::One }; + // static constexpr int Gemm[2] = { AlgoGemm::NestedDenseByBlocks, Variant::One }; + // }; + + // ---------------------------------------------------------------------------------- + + // - CholNestedDenseBlock + // * branch control between sparse and dense operations + template<> struct Control<AlgoChol::NestedDenseBlock,Variant::One> { + static constexpr int CholSparse[2] = { AlgoChol::UnblockedOpt, Variant::One }; + static constexpr int CholDense[2] = { AlgoChol::ExternalLapack, Variant::One }; + }; + + // - CholNestedDenseBlock + // * branch control between sparse and dense operations + template<> struct Control<AlgoChol::NestedDenseByBlocks,Variant::One> { + static constexpr int CholSparse[2] = { AlgoChol::UnblockedOpt, Variant::One }; + static constexpr int CholDenseByBlocks[2] = { AlgoChol::DenseByBlocks, Variant::One }; + }; + + // ---------------------------------------------------------------------------------- + + // - CholDenseBlock + // * dense matrix Cholesky-by-blocks + template<> struct Control<AlgoChol::DenseByBlocks,Variant::One> { + static constexpr int Chol[2] = { AlgoChol::ExternalLapack, Variant::One }; + static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas, Variant::One }; + static constexpr int Herk[2] = { AlgoHerk::ExternalBlas, Variant::One }; + static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; + }; + + template<> struct Control<AlgoGemm::DenseByBlocks,Variant::One> { + static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; + }; + + template<> struct Control<AlgoTrsm::DenseByBlocks,Variant::One> { + static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; + static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas, Variant::One }; + }; + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/coo.hpp b/lib/kokkos/example/ichol/src/coo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..977f17e5c5fb2d9ce520548cc04bc15c107a4c60 --- /dev/null +++ b/lib/kokkos/example/ichol/src/coo.hpp @@ -0,0 +1,75 @@ +#pragma once +#ifndef __COO_HPP__ +#define __COO_HPP__ + +/// \file coo.hpp +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + /// \class Coo + /// \brief Sparse coordinate format; (i, j, val). + template<typename CrsMatType> + class Coo { + public: + typedef typename CrsMatType::ordinal_type ordinal_type; + typedef typename CrsMatType::value_type value_type; + + public: + ordinal_type _i,_j; + value_type _val; + + public: + ordinal_type& Row() { return _i; } + ordinal_type& Col() { return _j; } + value_type& Val() { return _val; } + + ordinal_type Row() const { return _i; } + ordinal_type Col() const { return _j; } + value_type Val() const { return _val; } + + Coo() {} + + Coo(const ordinal_type i, + const ordinal_type j, + const value_type val) + : _i(i), + _j(j), + _val(val) + { } + + Coo(const Coo& b) + : _i(b._i), + _j(b._j), + _val(b._val) + { } + + Coo<CrsMatType>& operator=(const Coo<CrsMatType> &y) { + this->_i = y._i; + this->_j = y._j; + this->_val = y._val; + + return *this; + } + + /// \brief Compare "less" index i and j only. + bool operator<(const Coo<CrsMatType> &y) const { + ordinal_type r_val = (this->_i - y._i); + return (r_val == 0 ? this->_j < y._j : r_val < 0); + } + + /// \brief Compare "equality" only index i and j. + bool operator==(const Coo<CrsMatType> &y) const { + return (this->_i == y._i) && (this->_j == y._j); + } + + /// \brief Compare "in-equality" only index i and j. + bool operator!=(const Coo<CrsMatType> &y) const { + return !(*this == y); + } + }; + +} +#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ad08b8757e83c68b8a9224a1d41c3087930a2eb4 --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_matrix_base.hpp @@ -0,0 +1,598 @@ +#pragma once +#ifndef __CRS_MATRIX_BASE_HPP__ +#define __CRS_MATRIX_BASE_HPP__ + +/// \file crs_matrix_base.hpp +/// \brief CRS matrix base object interfaces to user provided input matrices. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "coo.hpp" + +namespace Tacho { + + using namespace std; + + template< typename , typename > class TaskView ; + + template < typename CrsMatrixType > + struct GetCrsMatrixRowViewType { + typedef int type ; + }; + + + template < typename CrsMatrixViewType , typename TaskFactoryType > + struct GetCrsMatrixRowViewType + < TaskView<CrsMatrixViewType,TaskFactoryType> > + { + typedef typename CrsMatrixViewType::row_view_type type ; + }; + + /// \class CrsMatrixBase + /// \breif CRS matrix base object using Kokkos view and subview + template<typename ValueType, + typename OrdinalType, + typename SizeType = OrdinalType, + typename SpaceType = void, + typename MemoryTraits = void> + class CrsMatrixBase { + public: + typedef ValueType value_type; + typedef OrdinalType ordinal_type; + typedef SpaceType space_type; + typedef SizeType size_type; + typedef MemoryTraits memory_traits; + + // 1D view, layout does not matter; no template parameters for that + typedef Kokkos::View<size_type*, space_type,memory_traits> size_type_array; + typedef Kokkos::View<ordinal_type*,space_type,memory_traits> ordinal_type_array; + typedef Kokkos::View<value_type*, space_type,memory_traits> value_type_array; + + typedef typename size_type_array::value_type* size_type_array_ptr; + typedef typename ordinal_type_array::value_type* ordinal_type_array_ptr; + typedef typename value_type_array::value_type* value_type_array_ptr; + + // range type + template<typename T> using range_type = pair<T,T>; + + // external interface + typedef Coo<CrsMatrixBase> ijv_type; + + friend class CrsMatrixHelper; + + private: + + ordinal_type _m; //!< # of rows + ordinal_type _n; //!< # of cols + size_type _nnz; //!< # of nonzeros + size_type_array _ap; //!< pointers to column index and values + ordinal_type_array _aj; //!< column index compressed format + value_type_array _ax; //!< values + + public: + + typedef typename GetCrsMatrixRowViewType< ValueType >::type row_view_type ; + typedef Kokkos::View<row_view_type*,space_type> row_view_type_array; + + row_view_type_array _all_row_views ; + + protected: + + void createInternalArrays(const ordinal_type m, + const ordinal_type n, + const size_type nnz) { + _m = m; + _n = n; + _nnz = nnz; + + if (static_cast<ordinal_type>(_ap.dimension_0()) < m+1) + _ap = size_type_array("CrsMatrixBase::RowPtrArray", m+1); + + if (static_cast<size_type>(_aj.dimension_0()) < nnz) + _aj = ordinal_type_array("CrsMatrixBase::ColsArray", nnz); + + if (static_cast<size_type>(_ax.dimension_0()) < nnz) + _ax = value_type_array("CrsMatrixBase::ValuesArray", nnz); + } + + // Copy sparse matrix structure from coordinate format in 'mm' + // to CRS format in Views _ap, _aj, a_x. + void ijv2crs(const vector<ijv_type> &mm) { + + ordinal_type ii = 0; + size_type jj = 0; + + ijv_type prev = mm[0]; + _ap[ii++] = 0; + _aj[jj] = prev.Col(); + _ax[jj] = prev.Val(); + ++jj; + + for (typename vector<ijv_type>::const_iterator it=(mm.begin()+1);it<mm.end();++it) { + ijv_type aij = (*it); + + // row index + if (aij.Row() != prev.Row()) { + _ap[ii++] = jj; + } + + if (aij == prev) { + --jj; + _aj[jj] = aij.Col(); + _ax[jj] += aij.Val(); + } else { + _aj[jj] = aij.Col(); + _ax[jj] = aij.Val(); + } + ++jj; + + prev = aij; + } + + // add the last index to terminate the storage + _ap[ii++] = jj; + _nnz = jj; + } + + public: + + KOKKOS_INLINE_FUNCTION + void setNumNonZeros() { + if (_m) + _nnz = _ap[_m]; + } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumRows() const { return _m; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumCols() const { return _n; } + + KOKKOS_INLINE_FUNCTION + size_type NumNonZeros() const { return _nnz; } + + KOKKOS_INLINE_FUNCTION + size_type_array_ptr RowPtr() const { return &_ap[0]; } + + KOKKOS_INLINE_FUNCTION + ordinal_type_array_ptr ColPtr() const { return &_aj[0]; } + + KOKKOS_INLINE_FUNCTION + value_type_array_ptr ValuePtr() const { return &_ax[0];} + + KOKKOS_INLINE_FUNCTION + size_type RowPtr(const ordinal_type i) const { return _ap[i]; } + + KOKKOS_INLINE_FUNCTION + ordinal_type_array_ptr ColsInRow(const ordinal_type i) const { return _aj.data() + _ap[i] ; } + + KOKKOS_INLINE_FUNCTION + value_type_array_ptr ValuesInRow(const ordinal_type i) const { return _ax.data() + _ap[i] ; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumNonZerosInRow(const ordinal_type i) const { return (_ap[i+1] - _ap[i]); } + + KOKKOS_INLINE_FUNCTION + value_type& Value(const ordinal_type k) { return _ax[k]; } + + KOKKOS_INLINE_FUNCTION + value_type Value(const ordinal_type k) const { return _ax[k]; } + + /// \brief Default constructor. + KOKKOS_INLINE_FUNCTION + CrsMatrixBase() + : _m(0), + _n(0), + _nnz(0), + _ap(), + _aj(), + _ax() + { } + + /// \brief Constructor with label + CrsMatrixBase(const string & ) + : _m(0), + _n(0), + _nnz(0), + _ap(), + _aj(), + _ax() + { } + + /// \brief Copy constructor (shallow copy), for deep-copy use a method copy + template<typename VT, + typename OT, + typename ST, + typename SpT, + typename MT> + CrsMatrixBase(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) + : _m(b._m), + _n(b._n), + _nnz(b._nnz), + _ap(b._ap), + _aj(b._aj), + _ax(b._ax) + { } + + /// \brief Constructor to allocate internal data structures. + CrsMatrixBase(const string & , + const ordinal_type m, + const ordinal_type n, + const ordinal_type nnz) + : _m(m), + _n(n), + _nnz(nnz), + _ap("CrsMatrixBase::RowPtrArray", m+1), + _aj("CrsMatrixBase::ColsArray", nnz), + _ax("CrsMatrixBase::ValuesArray", nnz) + { } + + /// \brief Constructor to attach external arrays to the matrix. + CrsMatrixBase(const string &, + const ordinal_type m, + const ordinal_type n, + const ordinal_type nnz, + const size_type_array &ap, + const ordinal_type_array &aj, + const value_type_array &ax) + : _m(m), + _n(n), + _nnz(nnz), + _ap(ap), + _aj(aj), + _ax(ax) + { } + + // Allow the copy function access to the input CrsMatrixBase + // private data. + template<typename, typename, typename, typename, typename> + friend class CrsMatrixBase ; + + public: + /// \brief deep copy of matrix b, potentially different spaces + template< typename SpT > + int + copy(const CrsMatrixBase<ValueType,OrdinalType,SizeType,SpT,MemoryTraits> &b) { + + space_type::execution_space::fence(); + + createInternalArrays(b._m, b._n, b._nnz); + + space_type::execution_space::fence(); + + const auto ap_range = range_type<ordinal_type>(0, min(_ap.dimension_0(), b._ap.dimension_0())); + const auto aj_range = range_type<size_type> (0, min(_aj.dimension_0(), b._aj.dimension_0())); + const auto ax_range = range_type<size_type> (0, min(_ax.dimension_0(), b._ax.dimension_0())); + + Kokkos::deep_copy(Kokkos::subview( _ap, ap_range), + Kokkos::subview(b._ap, ap_range)); + Kokkos::deep_copy(Kokkos::subview( _aj, aj_range), + Kokkos::subview(b._aj, aj_range)); + + Kokkos::deep_copy(Kokkos::subview( _ax, ax_range), + Kokkos::subview(b._ax, ax_range)); + + space_type::execution_space::fence(); + + return 0; + } + + /// \brief deep copy of lower/upper triangular of matrix b + int + copy(const int uplo, + const CrsMatrixBase &b) { + + createInternalArrays(b._m, b._n, b._nnz); + + // assume that matrix b is sorted. + switch (uplo) { + case Uplo::Lower: { + _nnz = 0; + for (ordinal_type i=0;i<_m;++i) { + size_type jbegin = b._ap[i]; + size_type jend = b._ap[i+1]; + _ap[i] = _nnz; + for (size_type j=jbegin;j<jend && (i >= b._aj[j]);++j,++_nnz) { + _aj[_nnz] = b._aj[j]; + _ax[_nnz] = b._ax[j]; + } + } + _ap[_m] = _nnz; + break; + } + case Uplo::Upper: { + _nnz = 0; + for (ordinal_type i=0;i<_m;++i) { + size_type j = b._ap[i]; + size_type jend = b._ap[i+1]; + _ap[i] = _nnz; + for ( ;j<jend && (i > b._aj[j]);++j) ; + for ( ;j<jend;++j,++_nnz) { + _aj[_nnz] = b._aj[j]; + _ax[_nnz] = b._ax[j]; + } + } + _ap[_m] = _nnz; + break; + } + } + + return 0; + } + + /// \brief deep copy of matrix b with given permutation vectors + template<typename VT, + typename OT, + typename ST, + typename SpT, + typename MT> + int + copy(const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &p, + const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &ip, + const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) { + + createInternalArrays(b._m, b._n, b._nnz); + + // Question:: do I need to use Kokkos::vector ? + // in other words, where do we permute matrix in factoriztion ? + // permuting a matrix is a kernel ? + vector<ijv_type> tmp; + + // any chance to use parallel_for ? + _nnz = 0; + for (ordinal_type i=0;i<_m;++i) { + ordinal_type ii = ip[i]; + + size_type jbegin = b._ap[ii]; + size_type jend = b._ap[ii+1]; + + _ap[i] = _nnz; + for (size_type j=jbegin;j<jend;++j) { + ordinal_type jj = p[b._aj[j]]; + ijv_type aij(i, jj, b._ax[j]); + tmp.push_back(aij); + } + + sort(tmp.begin(), tmp.end(), less<ijv_type>()); + for (auto it=tmp.begin();it<tmp.end();++it) { + ijv_type aij = (*it); + + _aj[_nnz] = aij.Col(); + _ax[_nnz] = aij.Val(); + ++_nnz; + } + tmp.clear(); + } + _ap[_m] = _nnz; + + return 0; + } + + /// \brief add the matrix b into this non-zero entires + template<typename VT, + typename OT, + typename ST, + typename SpT, + typename MT> + int + add(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) { + + const ordinal_type m = min(b._m, _m); + for (ordinal_type i=0;i<m;++i) { + const size_type jaend = _ap[i+1]; + const size_type jbend = b._ap[i+1]; + + size_type ja = _ap[i]; + size_type jb = b._ap[i]; + + for ( ;jb<jbend;++jb) { + for ( ;(_aj[ja]<b._aj[jb] && ja<jaend);++ja); + _ax[ja] += (_aj[ja] == b._aj[jb])*b._ax[jb]; + } + } + + return 0; + } + + int symmetrize(const int uplo, + const bool conjugate = false) { + vector<ijv_type> mm; + mm.reserve(_nnz*2); + + for (ordinal_type i=0;i<_m;++i) { + const size_type jbegin = _ap[i]; + const size_type jend = _ap[i+1]; + for (size_type jj=jbegin;jj<jend;++jj) { + const ordinal_type j = _aj[jj]; + const value_type val = (conjugate ? conj(_ax[j]) : _ax[j]); + if (uplo == Uplo::Lower && i > j) { + mm.push_back(ijv_type(i, j, val)); + mm.push_back(ijv_type(j, i, val)); + } else if (uplo == Uplo::Upper && i < j) { + mm.push_back(ijv_type(i, j, val)); + mm.push_back(ijv_type(j, i, val)); + } else if (i == j) { + mm.push_back(ijv_type(i, i, val)); + } + } + } + sort(mm.begin(), mm.end(), less<ijv_type>()); + + createInternalArrays(_m, _n, mm.size()); + + ijv2crs(mm); + + return 0; + } + + int hermitianize(int uplo) { + return symmetrize(uplo, true); + } + + ostream& showMe(ostream &os) const { + streamsize prec = os.precision(); + os.precision(8); + os << scientific; + + os << " -- CrsMatrixBase -- " << endl + << " # of Rows = " << _m << endl + << " # of Cols = " << _n << endl + << " # of NonZeros = " << _nnz << endl + << endl + << " RowPtrArray length = " << _ap.dimension_0() << endl + << " ColArray length = " << _aj.dimension_0() << endl + << " ValueArray length = " << _ax.dimension_0() << endl + << endl; + + const int w = 10; + if (_ap.size() && _aj.size() && _ax.size()) { + os << setw(w) << "Row" << " " + << setw(w) << "Col" << " " + << setw(w) << "Val" << endl; + for (ordinal_type i=0;i<_m;++i) { + size_type jbegin = _ap[i], jend = _ap[i+1]; + for (size_type j=jbegin;j<jend;++j) { + value_type val = _ax[j]; + os << setw(w) << i << " " + << setw(w) << _aj[j] << " " + << setw(w) << val << endl; + } + } + } + + os.unsetf(ios::scientific); + os.precision(prec); + + return os; + } + + int importMatrixMarket(ifstream &file) { + + vector<ijv_type> mm; + const ordinal_type mm_base = 1; + + { + string header; + if (file.is_open()) { + getline(file, header); + while (file.good()) { + char c = file.peek(); + if (c == '%' || c == '\n') { + file.ignore(256, '\n'); + continue; + } + break; + } + } else { + ERROR(MSG_INVALID_INPUT(file)); + } + + // check the header + bool symmetry = (header.find("symmetric") != string::npos); + + // read matrix specification + ordinal_type m, n; + size_type nnz; + + file >> m >> n >> nnz; + + mm.reserve(nnz*(symmetry ? 2 : 1)); + for (size_type i=0;i<nnz;++i) { + ordinal_type row, col; + value_type val; + file >> row >> col >> val; + + row -= mm_base; + col -= mm_base; + + mm.push_back(ijv_type(row, col, val)); + if (symmetry && row != col) + mm.push_back(ijv_type(col, row, val)); + } + sort(mm.begin(), mm.end(), less<ijv_type>()); + + // construct workspace and set variables + createInternalArrays(m, n, mm.size()); + } + + // change mm to crs + ijv2crs(mm); + + return 0; + } + + int exportMatrixMarket(ofstream &file, + const string comment, + const int uplo = 0) { + streamsize prec = file.precision(); + file.precision(8); + file << scientific; + + file << "%%MatrixMarket matrix coordinate " + << (is_fundamental<value_type>::value ? "real " : "complex ") + << ((uplo == Uplo::Upper || uplo == Uplo::Lower) ? "symmetric " : "general ") + << endl; + + file << comment << endl; + + // cnt nnz + size_type nnz = 0; + for (ordinal_type i=0;i<_m;++i) { + const size_type jbegin = _ap[i], jend = _ap[i+1]; + for (size_type j=jbegin;j<jend;++j) { + if (uplo == Uplo::Upper && i <= _aj[j]) ++nnz; + if (uplo == Uplo::Lower && i >= _aj[j]) ++nnz; + if (!uplo) ++nnz; + } + } + file << _m << " " << _n << " " << nnz << endl; + + const int w = 10; + for (ordinal_type i=0;i<_m;++i) { + const size_type jbegin = _ap[i], jend = _ap[i+1]; + for (size_type j=jbegin;j<jend;++j) { + bool flag = false; + if (uplo == Uplo::Upper && i <= _aj[j]) flag = true; + if (uplo == Uplo::Lower && i >= _aj[j]) flag = true; + if (!uplo) flag = true; + if (flag) { + value_type val = _ax[j]; + file << setw(w) << ( i+1) << " " + << setw(w) << (_aj[j]+1) << " " + << setw(w) << val << endl; + } + } + } + + file.unsetf(ios::scientific); + file.precision(prec); + + return 0; + } + + //---------------------------------------------------------------------- + + int convertGraph(size_type_array rptr, + ordinal_type_array cidx) const { + ordinal_type ii = 0; + size_type jj = 0; + + for (ordinal_type i=0;i<_m;++i) { + size_type jbegin = _ap[i], jend = _ap[i+1]; + rptr[ii++] = jj; + for (size_type j=jbegin;j<jend;++j) + if (i != _aj[j]) + cidx[jj++] = _aj[j]; + } + rptr[ii] = jj; + + return 0; + } + + //---------------------------------------------------------------------- + + }; + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e1ff0f3a9fd403ae51d68f77358409e1e3cd5cca --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp @@ -0,0 +1,104 @@ +#pragma once +#ifndef __CRS_MATRIX_BASE_IMPL_HPP__ +#define __CRS_MATRIX_BASE_IMPL_HPP__ + +/// \file crs_matrix_base_impl.hpp +/// \brief Implementation of external interfaces to CrsMatrixBase +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + template<typename VT, + typename OT, + typename ST, + typename SpT, + typename MT> + inline int + CrsMatrixBase<VT,OT,ST,SpT,MT>::importMatrixMarket(ifstream &file) { + // skip initial title comments + { + ordinal_type m, n; + size_type nnz; + + while (file.good()) { + char c = file.peek(); + if (c == '%' || c == '\n') { + file.ignore(256, '\n'); + continue; + } + break; + } + + // read matrix specification + file >> m >> n >> nnz; + + // construct workspace and set variables + createInternalArrays(m, n, nnz); + } + + // read the coordinate format (matrix-market) + vector<ijv_type> mm; + mm.reserve(_nnz); + { + // matrix market use one base index + const ordinal_type mm_base = 1; + + for (size_type i=0;i<_nnz;++i) { + ijv_type aij; + file >> aij.Row() >> aij.Col() >> aij.Val(); + + // one base to zero base + aij.Row() -= mm_base; + aij.Col() -= mm_base; + + mm.push_back(aij); + } + sort(mm.begin(), mm.end(), less<ijv_type>()); + } + + // change mm to crs + { + ordinal_type ii = 0; + size_type jj = 0; + + ijv_type prev = mm[0]; + _ap[ii++] = 0; + _aj[jj] = prev.Col(); + _ax[jj] = prev.Val(); + ++jj; + + for (typename vector<ijv_type>::iterator it=(mm.begin()+1);it<mm.end();++it) { + ijv_type aij = (*it); + + // row index + if (aij.Row() != prev.Row()) { + _ap[ii++] = jj; + } + + if (aij == prev) { + --jj; + _aj[jj] = aij.Col(); + _ax[jj] += aij.Val(); + } else { + _aj[jj] = aij.Col(); + _ax[jj] = aij.Val(); + } + ++jj; + + prev = aij; + } + + // add the last index to terminate the storage + _ap[ii++] = jj; + _nnz = jj; + } + + return 0; + } + +} + + +#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5b80e77935fcb968bff8f05e9876a10299a82182 --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp @@ -0,0 +1,71 @@ +#pragma once +#ifndef __CRS_MATRIX_HELPER_HPP__ +#define __CRS_MATRIX_HELPER_HPP__ + +/// \file crs_matrix_helper.hpp +/// \brief This file includes utility functions to convert between flat and hierarchical matrices. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" + +namespace Tacho { + + using namespace std; + + class CrsMatrixHelper { + public: + + template< typename CrsHierBase > + static int fillRowViewArray( CrsHierBase & HU ); + + template<typename CrsFlatBase> + static int + filterZeros(CrsFlatBase &flat); + + /// \brief Transform a scalar flat matrix to hierarchical matrix of matrices 1x1; testing only. + template<typename CrsFlatBase, + typename CrsHierBase> + static int + flat2hier(CrsFlatBase &flat, + CrsHierBase &hier); + + /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. + template<typename CrsFlatBase, + typename CrsHierBase, + typename HostOrdinalTypeArray > + static int + flat2hier(int uplo, + CrsFlatBase &flat, + CrsHierBase &hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range, + const HostOrdinalTypeArray tree); + + /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. + template<typename CrsFlatBase, + typename CrsHierBase, + typename HostOrdinalTypeArray > + static int + flat2hier_upper(CrsFlatBase &flat, + CrsHierBase &hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range, + const HostOrdinalTypeArray tree); + + /// \brief Transform a scalar flat matrix to lower hierarchical matrix given scotch info. + template<typename CrsFlatBase, + typename CrsHierBase, + typename HostOrdinalTypeArray > + static int + flat2hier_lower(CrsFlatBase &flat, + CrsHierBase &hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range, + const HostOrdinalTypeArray tree); + }; + +} + +#include "crs_matrix_helper_impl.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0fc4c9f1b83c0e48d3e42eb61e8e1cea12b1c187 --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp @@ -0,0 +1,364 @@ + +#ifndef __CRS_MATRIX_HELPER_IMPL_HPP__ +#define __CRS_MATRIX_HELPER_IMPL_HPP__ + +/// \file crs_matrix_helper_impl.hpp +/// \brief This file includes utility functions to convert between flat and hierarchical matrices. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" + +namespace Tacho { + + using namespace std; + + template< typename CrsHierBase > + struct FunctorFillRowViewArray { + + typedef typename CrsHierBase::ordinal_type ordinal_type ; + typedef typename CrsHierBase::row_view_type_array row_view_type_array ; + typedef typename CrsHierBase::value_type_array ax_type ; + + typedef ordinal_type value_type ; + + row_view_type_array _all_row_views ; + ax_type _ax ; + + FunctorFillRowViewArray( const row_view_type_array & arg_all_row_views + , const ax_type & arg_ax ) + : _all_row_views( arg_all_row_views ) + , _ax( arg_ax ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( ordinal_type k , ordinal_type & value ) const + { value += _ax(k).NumRows(); } + + KOKKOS_INLINE_FUNCTION + void operator()( ordinal_type k , ordinal_type & value , bool final ) const + { + if ( final ) { + const int begin = value ; + const int end = begin + _ax(k).NumRows(); + + auto sub = Kokkos::subview( _all_row_views, Kokkos::pair<int,int>(begin,end) ); + + _ax(k).setRowViewArray( sub ); + } + + value += _ax(k).NumRows(); + } + }; + + template< typename CrsHierBase > + int CrsMatrixHelper::fillRowViewArray( CrsHierBase & device_HU ) + { + typedef typename CrsHierBase::row_view_type_array row_view_type_array ; + typedef typename CrsHierBase::space_type space_type ; + + ordinal_type total_row_view_count = 0 ; + + Kokkos::RangePolicy< space_type > + range_policy( 0 , device_HU.NumNonZeros() ); + + space_type::fence(); + + { + FunctorFillRowViewArray< CrsHierBase > + functor( row_view_type_array() , device_HU._ax ); + + + Kokkos::parallel_reduce( range_policy , functor , total_row_view_count ); + } + + device_HU._all_row_views = + row_view_type_array("RowViews",total_row_view_count); + + space_type::fence(); + + { + FunctorFillRowViewArray< CrsHierBase > + functor( device_HU._all_row_views , device_HU._ax ); + + Kokkos::parallel_scan( range_policy , functor ); + } + + space_type::fence(); + + return 0 ; + } + + template<typename CrsFlatBase> + int + CrsMatrixHelper::filterZeros(CrsFlatBase &flat) { + typedef typename CrsFlatBase::ordinal_type ordinal_type; + typedef typename CrsFlatBase::size_type size_type; + typedef typename CrsFlatBase::value_type value_type; + + typedef typename CrsFlatBase::ordinal_type_array_ptr ordinal_type_array_ptr; + typedef typename CrsFlatBase::value_type_array_ptr value_type_array_ptr; + + size_type nz = 0; + const value_type zero(0); + + for (ordinal_type k=0;k<flat.NumNonZeros();++k) + nz += (flat.Value(k) == zero) ; + + if (nz) { + CrsFlatBase resized(flat.Label() + "::ZeroFiltered", + flat.NumRows(), + flat.NumCols(), + flat.NumNonZeros() - nz); + + ordinal_type_array_ptr rows = resized.RowPtr(); rows[0] = 0; + ordinal_type_array_ptr cols = resized.ColPtr(); + value_type_array_ptr vals = resized.ValuePtr(); + + size_type nnz = 0; + for (ordinal_type i=0;i<flat.NumRows();++i) { + const ordinal_type nnz_in_row = flat.NumNonZerosInRow(i); + const ordinal_type_array_ptr cols_in_row = flat.ColsInRow(i); + const value_type_array_ptr vals_in_row = flat.ValuesInRow(i); + + for (ordinal_type j=0;j<nnz_in_row;++j) { + if (vals_in_row[j] != zero) { + cols[nnz] = cols_in_row[j]; + vals[nnz] = vals_in_row[j]; + ++nnz; + } + } + rows[i+1] = nnz; + } + flat = resized; + resized.setNumNonZeros(); + } + + return 0; + } + + + template<typename CrsFlatBase, + typename CrsHierBase> + int + CrsMatrixHelper::flat2hier(CrsFlatBase &flat, + CrsHierBase &hier) { + typedef typename CrsHierBase::ordinal_type ordinal_type; + typedef typename CrsHierBase::size_type size_type; + typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; + + size_type nnz = 0; + + hier.createInternalArrays(flat.NumRows(), flat.NumCols(), flat.NumNonZeros()); + + for (ordinal_type i=0;i<flat.NumRows();++i) { + ordinal_type jsize = flat.NumNonZerosInRow(i); + + hier._ap[i] = nnz; + ordinal_type_array_ptr ci = flat.ColsInRow(i); + for (ordinal_type j=0;j<jsize;++j,++nnz) { + hier._aj[nnz] = ci[j]; + hier._ax[nnz].setView( flat, i, 1, + /**/ ci[j], 1); + } + } + + hier._ap[flat.NumRows()] = nnz; + hier._nnz = nnz; + + return 0; + } + + template< typename CrsFlatBase , + typename CrsHierBase , + typename HostOrdinalTypeArray > + int + CrsMatrixHelper::flat2hier(int uplo, + CrsFlatBase &flat, + CrsHierBase &hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range , + const HostOrdinalTypeArray tree) { + switch(uplo) { + case Uplo::Upper: return flat2hier_upper(flat, hier, nblks, range, tree); + case Uplo::Lower: return flat2hier_lower(flat, hier, nblks, range, tree); + } + return -1; + } + + template<typename CrsFlatBase, + typename CrsHierBase, + typename HostOrdinalTypeArray > + int + CrsMatrixHelper::flat2hier_upper(CrsFlatBase & device_flat, + CrsHierBase & device_hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range, + const HostOrdinalTypeArray tree) { + typedef typename CrsHierBase::ordinal_type ordinal_type; + typedef typename CrsHierBase::size_type size_type; + + //typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; + //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; + //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; + + size_type nnz = 0; + + // count nnz and nnz in rows for the upper triangular hier matrix + for (ordinal_type i=0;i<nblks;++i) + for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) ; + + // create upper triangular block matrix + device_hier.createInternalArrays(nblks, nblks, nnz); + + typename CrsHierBase::size_type_array::HostMirror + host_ap = Kokkos::create_mirror_view( device_hier._ap ); + + typename CrsHierBase::ordinal_type_array::HostMirror + host_aj = Kokkos::create_mirror_view( device_hier._aj ); + + typename CrsHierBase::value_type_array::HostMirror + host_ax = Kokkos::create_mirror_view( device_hier._ax ); + + nnz = 0; + for (ordinal_type i=0;i<nblks;++i) { + host_ap[i] = nnz; + for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) { + host_aj[nnz] = j; + host_ax[nnz].setView( device_flat, range[i], (range[i+1] - range[i]), + /**/ range[j], (range[j+1] - range[j])); + + // this checking might more expensive + // and attempts to access device memory from the host + // if (!host_ax[nnz].countNumNonZeros()) + // --nnz; + } + } + + host_ap[nblks] = nnz; + + Kokkos::deep_copy( device_hier._ap , host_ap ); + Kokkos::deep_copy( device_hier._aj , host_aj ); + Kokkos::deep_copy( device_hier._ax , host_ax ); + + device_hier._nnz = nnz; + + return 0; + } + + // template<typename CrsFlatBase, + // typename CrsHierBase> + // int + // CrsMatrixHelper::flat2hier_upper(CrsFlatBase &flat, + // CrsHierBase &hier, + // const typename CrsHierBase::ordinal_type nblks, + // const typename CrsHierBase::ordinal_type_array range, + // const typename CrsHierBase::ordinal_type_array tree) { + // typedef typename CrsHierBase::ordinal_type ordinal_type; + // typedef typename CrsHierBase::size_type size_type; + + // typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; + // //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; + // //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; + + // ordinal_type_array sibling("CrsMatrixHelper::flat2hier_upper::sibling", nblks); + + // // check the end of adjacent siblings (if not adjacent, they are separators) + // ordinal_type p = tree[0]; + // for (ordinal_type i=1;i<nblks;++i) { + // const ordinal_type j = tree[i]; + // if (p != j) { + // p = j; + // sibling[i-1] = -1; + // } + // } + // sibling[nblks-1] = -1; + + // size_type nnz = 0; + + // // count nnz and nnz in rows for the upper triangular hier matrix + // for (ordinal_type i=0;i<nblks;++i) { // search for all rows + // for (ordinal_type j=i;j != -1;j=tree[j]) { // move up + // ordinal_type k=j; + // do { + // ++nnz; + // } while (sibling[k++] != -1); + // } + // } + + // // create upper triangular block matrix + // hier.createInternalArrays(nblks, nblks, nnz); + + // nnz = 0; + // for (ordinal_type i=0;i<nblks;++i) { + // hier._ap[i] = nnz; + // for (ordinal_type j=i;j != -1;j=tree[j]) { + // ordinal_type k=j; + // do { + // hier._aj[nnz] = k; + // hier._ax[nnz].setView( flat, range[i], (range[i+1] - range[i]), + // /**/ range[k], (range[k+1] - range[k])); + + // // this checking might more expensive + // if (hier._ax[nnz].hasNumNonZeros()) + // ++nnz; + // } while (sibling[k++] != -1); + // } + // } + // hier._ap[nblks] = nnz; + // hier._nnz = nnz; + + // return 0; + // } + + template<typename CrsFlatBase, + typename CrsHierBase, + typename HostOrdinalTypeArray > + int + CrsMatrixHelper::flat2hier_lower(CrsFlatBase &flat, + CrsHierBase &hier, + const typename CrsHierBase::ordinal_type nblks, + const HostOrdinalTypeArray range, + const HostOrdinalTypeArray tree) { + ERROR(MSG_NOT_YET_IMPLEMENTED); + + // typedef typename CrsHierBase::ordinal_type ordinal_type; + // typedef typename CrsHierBase::size_type size_type; + + // typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; + // //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; + // //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; + + // ordinal_type_array tmp = ordinal_type_array("flat2hier:tmp", nblks+1); + // size_type nnz = 0; + + // // count nnz and nnz in rows for lower triangular matrix + // for (ordinal_type i=0;i<nblks;++i) + // for (ordinal_type j=i;j != -1;++nnz) { + // ++tmp[j]; + // j = tree[j]; + // } + + // // count nnz and nnz in rows for lower triangular matrix + // hier.createInternalArrays(nblks, nblks, nnz); + // for (ordinal_type i=1;i<(nblks+1);++i) + // hier._ap[i] = hier._ap[i-1] + tmp[i-1]; + + // for (ordinal_type i=0;i<(nblks+1);++i) + // tmp[i] = hier._ap[i]; + + // for (ordinal_type i=0;i<nblks;++i) + // for (ordinal_type j=i;j != -1;j=tree[j]) { + // hier._aj[tmp[j]] = i; + // hier._ax[tmp[j]].setView( flat, range[j], (range[j+1] - range[j]), + // /**/ range[i], (range[i+1] - range[i])); + // ++tmp[j]; + // } + + return 0; + } + +} + + +#endif + diff --git a/lib/kokkos/example/ichol/src/crs_matrix_view.hpp b/lib/kokkos/example/ichol/src/crs_matrix_view.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2a55e6fac9b64eca3eade412a1511913baafab85 --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_matrix_view.hpp @@ -0,0 +1,226 @@ +#pragma once +#ifndef __CRS_MATRIX_VIEW_HPP__ +#define __CRS_MATRIX_VIEW_HPP__ + +/// \file crs_matrix_view.hpp +/// \brief CRS matrix view object creates 2D view to setup a computing region. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" + +namespace Tacho { + + using namespace std; + + template<typename CrsMatBaseType> + class CrsRowView; + + template<typename CrsMatBaseType> + class CrsMatrixView { + public: + typedef typename CrsMatBaseType::space_type space_type; + + typedef typename CrsMatBaseType::value_type value_type; + typedef typename CrsMatBaseType::ordinal_type ordinal_type; + typedef typename CrsMatBaseType::size_type size_type; + + typedef CrsMatBaseType mat_base_type; + typedef CrsRowView<mat_base_type> row_view_type; + + // be careful this use rcp and atomic operation + // - use setView to create a view if _rows is not necessary + // - copy constructor and assignment operator will do soft copy of the object + typedef Kokkos::View<row_view_type*,space_type,Kokkos::MemoryUnmanaged> row_view_type_array; + + private: + CrsMatBaseType _base; // shallow copy of the base object + ordinal_type _offm; // offset in rows + ordinal_type _offn; // offset in cols + ordinal_type _m; // # of rows + ordinal_type _n; // # of cols + + row_view_type_array _rows; + + public: + + KOKKOS_INLINE_FUNCTION + void setRowViewArray( const row_view_type_array & arg_rows ) + { + _rows = arg_rows ; + + for (ordinal_type i=0;i<_m;++i) { + _rows[i].setView(*this, i); + } + } + + KOKKOS_INLINE_FUNCTION + row_view_type& RowView(const ordinal_type i) const { return _rows[i]; } + + KOKKOS_INLINE_FUNCTION + void setView(const CrsMatBaseType &base, + const ordinal_type offm, const ordinal_type m, + const ordinal_type offn, const ordinal_type n) { + _base = base; + + _offm = offm; _m = m; + _offn = offn; _n = n; + } + + KOKKOS_INLINE_FUNCTION + const CrsMatBaseType & BaseObject() const { return _base; } + + KOKKOS_INLINE_FUNCTION + ordinal_type OffsetRows() const { return _offm; } + + KOKKOS_INLINE_FUNCTION + ordinal_type OffsetCols() const { return _offn; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumRows() const { return _m; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumCols() const { return _n; } + + KOKKOS_INLINE_FUNCTION + bool hasNumNonZeros() const { + const ordinal_type m = NumRows(); + for (ordinal_type i=0;i<m;++i) { + row_view_type row; + row.setView(*this, i); + if (row.NumNonZeros()) return true; + } + return false; + } + + inline + size_type countNumNonZeros() const { + size_type nnz = 0; + const ordinal_type m = NumRows(); + for (ordinal_type i=0;i<m;++i) { + row_view_type row; + row.setView(*this, i); + nnz += row.NumNonZeros(); + } + return nnz; + } + + KOKKOS_INLINE_FUNCTION + CrsMatrixView() + : _base(), + _offm(0), + _offn(0), + _m(0), + _n(0), + _rows() + { } + + KOKKOS_INLINE_FUNCTION + CrsMatrixView(const CrsMatrixView &b) + : _base(b._base), + _offm(b._offm), + _offn(b._offn), + _m(b._m), + _n(b._n), + _rows(b._rows) + { } + + KOKKOS_INLINE_FUNCTION + CrsMatrixView(const CrsMatBaseType & b) + : _base(b), + _offm(0), + _offn(0), + _m(b.NumRows()), + _n(b.NumCols()), + _rows() + { } + + CrsMatrixView(const CrsMatBaseType & b, + const ordinal_type offm, const ordinal_type m, + const ordinal_type offn, const ordinal_type n) + : _base(b), + _offm(offm), + _offn(offn), + _m(m), + _n(n), + _rows() + { } + + ostream& showMe(ostream &os) const { + const int w = 4; + os << "CrsMatrixView, " + << " Offs ( " << setw(w) << _offm << ", " << setw(w) << _offn << " ); " + << " Dims ( " << setw(w) << _m << ", " << setw(w) << _n << " ); " + << " NumNonZeros = " << countNumNonZeros() << ";"; + + return os; + } + + }; +} + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if ! KOKKOS_USING_EXP_VIEW + +namespace Kokkos { + namespace Impl { + + // The Kokkos::View allocation will by default assign each allocated datum to zero. + // This is not the required initialization behavior when + // Tacho::CrsRowView and Tacho::CrsMatrixView + // are used within a Kokkos::View. + // Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct + // to replace the assignment initialization with placement new initialization. + // + // This work-around is necessary until a TBD design refactorization of Kokkos::View. + + template< class ExecSpace , typename T > + struct ViewDefaultConstruct< ExecSpace , Tacho::CrsRowView<T> , true > + { + typedef Tacho::CrsRowView<T> type ; + type * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { new(m_ptr+i) type(); } + + ViewDefaultConstruct( type * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } + }; + + template< class ExecSpace , typename T > + struct ViewDefaultConstruct< ExecSpace , Tacho::CrsMatrixView<T> , true > + { + typedef Tacho::CrsMatrixView<T> type ; + type * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { new(m_ptr+i) type(); } + + ViewDefaultConstruct( type * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } + }; + + } // namespace Impl +} // namespace Kokkos + +#endif /* #if ! KOKKOS_USING_EXP_VIEW */ + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif diff --git a/lib/kokkos/example/ichol/src/crs_row_view.hpp b/lib/kokkos/example/ichol/src/crs_row_view.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8556bcb9e637dd64afdf92f4ef6b526a14562d09 --- /dev/null +++ b/lib/kokkos/example/ichol/src/crs_row_view.hpp @@ -0,0 +1,185 @@ +#pragma once +#ifndef __CRS_ROW_VIEW_HPP__ +#define __CRS_ROW_VIEW_HPP__ + +/// \file crs_row_view.hpp +/// \brief A view to a row extracted from CrsMatrixView. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + /// \class CrsRowView + template<typename CrsMatBaseType> + class CrsRowView { + public: + typedef typename CrsMatBaseType::ordinal_type ordinal_type; + typedef typename CrsMatBaseType::value_type value_type; + typedef typename CrsMatBaseType::ordinal_type_array_ptr ordinal_type_array_ptr; + typedef typename CrsMatBaseType::value_type_array_ptr value_type_array_ptr; + + private: + // row info + ordinal_type _offn, _n; + + // this assumes a contiguous memory buffer + ordinal_type_array_ptr _aj, _ajn; // column index compressed format in row + value_type_array_ptr _ax; // values + + static KOKKOS_INLINE_FUNCTION + typename CrsMatBaseType::ordinal_type_array_ptr + lower_bound( typename CrsMatBaseType::ordinal_type_array_ptr begin , + typename CrsMatBaseType::ordinal_type_array_ptr const end , + typename CrsMatBaseType::ordinal_type const val ) + { + typename CrsMatBaseType::ordinal_type_array_ptr it = begin ; + int count = end - begin ; + int step = 0 ; + while (count>0) { + it = begin ; + it += ( step = (count >> 1) ); + if (*it<val) { + begin=++it; + count-=step+1; + } + else { count=step; } + } + return begin; + } + + public: + KOKKOS_INLINE_FUNCTION + ordinal_type OffsetCols() const { return _offn; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumCols() const { return _n; } + + KOKKOS_INLINE_FUNCTION + ordinal_type NumNonZeros() const { return _ajn - _aj; } + + KOKKOS_INLINE_FUNCTION + ordinal_type Col(const ordinal_type j) const { return _aj[j] - _offn; } + + KOKKOS_INLINE_FUNCTION + value_type& Value(const ordinal_type j) { return _ax[j]; } + + KOKKOS_INLINE_FUNCTION + value_type Value(const ordinal_type j) const { return _ax[j]; } + + KOKKOS_INLINE_FUNCTION + ordinal_type Index(const ordinal_type col ) const { + const ordinal_type loc = _offn + col ; + // binary search + ordinal_type_array_ptr aj = CrsRowView::lower_bound(_aj, _ajn, loc); + + // if found, return index for the location, + // otherwise return -1 (not found), -2 (end of array) + return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2); + } + + KOKKOS_INLINE_FUNCTION + ordinal_type Index(const ordinal_type col, + const ordinal_type prev ) const { + const ordinal_type loc = _offn + col; + ordinal_type_array_ptr aj = _aj + prev; + + // binary search + // aj = lower_bound(aj, _ajn, loc); + + // linear search from prev: this gains about 45 % faster + for ( ;aj < _ajn && *aj<loc; ++aj); + + // if found, return index for the location, + // otherwise return -1 (not found), -2 (end of array) + return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2); + } + + KOKKOS_INLINE_FUNCTION + value_type ValueAtColumn(const ordinal_type col) const { + const ordinal_type j = Index(col); + return (j < 0 ? value_type(0) : _ax[j]); + } + + KOKKOS_INLINE_FUNCTION + CrsRowView() + : _offn(0), + _n(0), + _aj(), + _ajn(), + _ax() + { } + + + KOKKOS_INLINE_FUNCTION + CrsRowView(const ordinal_type offn, + const ordinal_type n, + const ordinal_type_array_ptr aj, + const ordinal_type_array_ptr ajn, + const value_type_array_ptr ax) + : _offn(offn), + _n(n), + _aj(aj), + _ajn(ajn), + _ax(ax) + { } + + KOKKOS_INLINE_FUNCTION + CrsRowView(const CrsMatrixView<CrsMatBaseType> &A, + const ordinal_type i) { + this->setView(A, i); + } + + KOKKOS_INLINE_FUNCTION + CrsRowView(const CrsMatBaseType &A, + const ordinal_type i) { + this->setView(A, i); + } + + KOKKOS_INLINE_FUNCTION + void setView(const CrsMatrixView<CrsMatBaseType> &A, + const ordinal_type i) { + _offn = A.OffsetCols(); + _n = A.NumCols(); + + const ordinal_type ii = A.OffsetRows() + i; + + const typename CrsMatBaseType::ordinal_type_array_ptr cols = A.BaseObject().ColsInRow(ii); + const typename CrsMatBaseType::ordinal_type_array_ptr next = A.BaseObject().ColsInRow(ii+1); + const typename CrsMatBaseType::value_type_array_ptr vals = A.BaseObject().ValuesInRow(ii); + + // [cols..next) is sorted so a log(N) search could performed + _aj = CrsRowView::lower_bound(cols, next, _offn); + _ajn = CrsRowView::lower_bound(_aj, next, _offn+_n); + + _ax = &vals[_aj - cols]; + } + + KOKKOS_INLINE_FUNCTION + void setView(const CrsMatBaseType &A, + const ordinal_type i) { + _offn = 0; + _n = A.NumCols(); + _aj = A.ColsInRow(i); + _ajn = A.ColsInRow(i+1); + _ax = A.ValuesInRow(i); + } + + ostream& showMe(ostream &os) const { + const ordinal_type nnz = NumNonZeros(); + const ordinal_type offset = OffsetCols(); + os << " offset = " << offset + << ", nnz = " << nnz + << endl; + for (ordinal_type j=0;j<nnz;++j) { + const value_type val = _ax[j]; + os << "(" << Col(j) << ", " + << val << ")" + << endl; + } + return os; + } + }; +} + +#endif diff --git a/lib/kokkos/example/ichol/src/dot.hpp b/lib/kokkos/example/ichol/src/dot.hpp new file mode 100644 index 0000000000000000000000000000000000000000..acf927e0689759873b441012e187131a54055f88 --- /dev/null +++ b/lib/kokkos/example/ichol/src/dot.hpp @@ -0,0 +1,74 @@ +#pragma once +#ifndef __DOT_HPP__ +#define __DOT_HPP__ + +/// \file dot.hpp +/// \brief Sparse dot product. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +/// dot_type result = x^H y + +namespace Tacho { + + using namespace std; + + template<typename T> struct DotTraits { + typedef T dot_type; + + static KOKKOS_FORCEINLINE_FUNCTION + dot_type + // dot(const T &x, const T &y) { return conj<T>(x)*y; } + dot(const T &x, const T &y) { return x*y; } + }; + + template<typename CrsRowViewType> + KOKKOS_INLINE_FUNCTION + typename CrsRowViewType::value_type + dot(const CrsRowViewType x, const CrsRowViewType y) { + typedef typename CrsRowViewType::ordinal_type ordinal_type; + typedef typename CrsRowViewType::value_type value_type; + + typedef DotTraits<value_type> dot_traits; + + value_type r_val(0); + + const ordinal_type nnz_x = x.NumNonZeros(); + const ordinal_type nnz_y = y.NumNonZeros(); + + for (ordinal_type jx=0, jy=0;jx<nnz_x && jy<nnz_y;) { + const ordinal_type diff = x.Col(jx) - y.Col(jy); + const ordinal_type sign = (0 < diff) - (diff < 0); + switch (sign) { + case 0: + r_val += dot_traits::dot(x.Value(jx++), y.Value(jy++)); + break; + case -1: ++jx; break; + case 1: ++jy; break; + } + } + + return r_val; + } + + template<typename CrsRowViewType> + KOKKOS_INLINE_FUNCTION + typename CrsRowViewType::value_type + dot(const CrsRowViewType x) { + typedef typename CrsRowViewType::ordinal_type ordinal_type; + typedef typename CrsRowViewType::value_type value_type; + + typedef DotTraits<value_type> dot_traits; + + value_type r_val(0); + + const ordinal_type nnz = x.NumNonZeros(); + + for (ordinal_type j=0;j<nnz;++j) + r_val += dot_traits::dot(x.Value(j), x.Value(j)); + + return r_val; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/gemm.hpp b/lib/kokkos/example/ichol/src/gemm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..33c6058ec6fc6727dc62a320cab7bbb1855ea93f --- /dev/null +++ b/lib/kokkos/example/ichol/src/gemm.hpp @@ -0,0 +1,99 @@ +#pragma once +#ifndef __GEMM_HPP__ +#define __GEMM_HPP__ + +/// \file gemm.hpp +/// \brief Sparse matrix-matrix multiplication on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "control.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template<int ArgTransA, int ArgTransB, int ArgAlgo, + int ArgVariant = Variant::One, + template<int,int> class ControlType = Control> + struct Gemm { + + // data-parallel interface + // ======================= + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeB, + typename ExecViewTypeC> + KOKKOS_INLINE_FUNCTION + static int invoke(typename ExecViewTypeA::policy_type &policy, + const typename ExecViewTypeA::policy_type::member_type &member, + const ScalarType alpha, + typename ExecViewTypeA::matrix_type &A, + typename ExecViewTypeB::matrix_type &B, + const ScalarType beta, + typename ExecViewTypeC::matrix_type &C); + + // task-data parallel interface + // ============================ + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeB, + typename ExecViewTypeC> + class TaskFunctor { + public: + typedef typename ExecViewTypeA::policy_type policy_type; + typedef typename policy_type::member_type member_type; + typedef int value_type; + + private: + ScalarType _alpha, _beta; + typename ExecViewTypeA::matrix_type _A; + typename ExecViewTypeB::matrix_type _B; + typename ExecViewTypeC::matrix_type _C; + + policy_type _policy; + + public: + KOKKOS_INLINE_FUNCTION + TaskFunctor(const policy_type & P, + const ScalarType alpha, + const typename ExecViewTypeA::matrix_type & A, + const typename ExecViewTypeB::matrix_type & B, + const ScalarType beta, + const typename ExecViewTypeC::matrix_type & C) + : _alpha(alpha), + _beta(beta), + _A(A), + _B(B), + _C(C), + _policy(P) + { } + + string Label() const { return "Gemm"; } + + // task execution + KOKKOS_INLINE_FUNCTION + void apply(value_type &r_val) { + r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, _policy.member_single(), + _alpha, _A, _B, _beta, _C); + } + + // task-data execution + KOKKOS_INLINE_FUNCTION + void apply(const member_type &member, value_type &r_val) { + r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, member, + _alpha, _A, _B, _beta, _C); + } + + }; + + }; + +} + + +// #include "gemm_nt_nt.hpp" +#include "gemm_ct_nt.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp new file mode 100644 index 0000000000000000000000000000000000000000..13d2518cab90896929ecb58645e61aeb51849394 --- /dev/null +++ b/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp @@ -0,0 +1,12 @@ +#pragma once +#ifndef __GEMM_CT_NT_HPP__ +#define __GEMM_CT_NT_HPP__ + +/// \file gemm_ct_nt.hpp +/// \brief Sparse matrix-matrix multiplication on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "gemm_ct_nt_for_factor_blocked.hpp" +// #include "gemm_ct_nt_for_tri_solve_blocked.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp new file mode 100644 index 0000000000000000000000000000000000000000..88a4658482a1504ab6ad6334d65bd34a7dea055f --- /dev/null +++ b/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp @@ -0,0 +1,108 @@ +#pragma once +#ifndef __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__ +#define __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__ + +/// \file gemm_ct_nt_for_factor_blocked.hpp +/// \brief Sparse matrix-matrix multiplication on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + // Gemm used in the factorization phase + // ==================================== + template<> + template<typename ScalarType, + typename CrsExecViewTypeA, + typename CrsExecViewTypeB, + typename CrsExecViewTypeC> + KOKKOS_INLINE_FUNCTION + int + Gemm<Trans::ConjTranspose,Trans::NoTranspose, + AlgoGemm::ForFactorBlocked> + ::invoke(typename CrsExecViewTypeA::policy_type &policy, + const typename CrsExecViewTypeA::policy_type::member_type &member, + const ScalarType alpha, + typename CrsExecViewTypeA::matrix_type &A, + typename CrsExecViewTypeB::matrix_type &B, + const ScalarType beta, + typename CrsExecViewTypeC::matrix_type &C) { + typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; + typedef typename CrsExecViewTypeA::value_type value_type; + typedef typename CrsExecViewTypeA::row_view_type row_view_type; + + +if ( false && member.team_rank() == 0 ) { + printf("Gemm [%d +%d)x[%d +%d)\n" + , C.OffsetRows() + , C.NumRows() + , C.OffsetCols() + , C.NumCols() + ); +} + + // scale the matrix C with beta + scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C); + + // Sparse matrix-matrix multiply: + // C(i,j) += alpha*A'(i,k)*B(k,j) + + const ordinal_type mA = A.NumRows(); + for (ordinal_type k=0;k<mA;++k) { + row_view_type &a = A.RowView(k); + const ordinal_type nnz_a = a.NumNonZeros(); + + row_view_type &b = B.RowView(k); + const ordinal_type nnz_b = b.NumNonZeros(); + + if (nnz_a > 0 && nnz_b > 0 ) { +#if 0 + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, nnz_a), + [&](const ordinal_type i) { + const ordinal_type row_at_i = a.Col(i); + const value_type val_at_ik = a.Value(i); + // const value_type val_at_ik = conj(a.Value(i)); + + row_view_type &c = C.RowView(row_at_i); + + ordinal_type idx = 0; + for (ordinal_type j=0;j<nnz_b && (idx > -2);++j) { + const ordinal_type col_at_j = b.Col(j); + const value_type val_at_kj = b.Value(j); + + idx = c.Index(col_at_j, idx); + if (idx >= 0) + c.Value(idx) += alpha*val_at_ik*val_at_kj; + } + }); +#else + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, nnz_a * nnz_b ), + [&](const ordinal_type ii) { + const ordinal_type i = ii / nnz_a ; + const ordinal_type j = ii % nnz_a ; + + row_view_type &c = C.RowView( a.Col(i) ); + + // Binary search for c's index of b.Col(j) + const ordinal_type idx = c.Index( b.Col(j) ); + + if (idx >= 0) { + // const value_type val_at_ik = conj(a.Value(i)); + c.Value(idx) += alpha * a.Value(i) * b.Value(j); + } + }); +#endif + + member.team_barrier(); + } + } + + return 0; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp b/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d2dd004579a507439b457a12a6f0de909bf33acd --- /dev/null +++ b/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp @@ -0,0 +1,427 @@ +#pragma once +#ifndef __GRAPH_HELPER_SCOTCH_HPP__ +#define __GRAPH_HELPER_SCOTCH_HPP__ + +/// \file graph_helper_scotch.hpp +/// \brief Interface to scotch reordering +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "scotch.h" +#include "util.hpp" + +namespace Tacho { + + using namespace std; + + template<class CrsMatBaseType> + class GraphHelper_Scotch : public Disp { + public: + typedef typename CrsMatBaseType::ordinal_type ordinal_type; + typedef typename CrsMatBaseType::size_type size_type; + + typedef typename CrsMatBaseType::ordinal_type_array ordinal_type_array; + typedef typename CrsMatBaseType::size_type_array size_type_array; + + private: + string _label; + + // scotch main data structure + SCOTCH_Graph _graph; + SCOTCH_Num _strat; + int _level; + + // scotch input has no diagonal contribution + ordinal_type _base,_m; + ordinal_type_array _cidx; + + size_type _nnz; + size_type_array _rptr; + + // scotch output + ordinal_type _cblk; + ordinal_type_array _perm,_peri,_range,_tree; + + // status flag + bool _is_ordered; + + public: + + void setLabel(string label) { _label = label; } + string Label() const { return _label; } + + size_type NumNonZeros() const { return _nnz; } + ordinal_type NumRows() const { return _m; } + + size_type_array RowPtrVector() const { return _rptr; } + ordinal_type_array ColIndexVector() const { return _cidx; } + + ordinal_type_array PermVector() const { return _perm; } + ordinal_type_array InvPermVector() const { return _peri; } + + ordinal_type_array RangeVector() const { return _range; } + ordinal_type_array TreeVector() const { return _tree; } + + ordinal_type NumBlocks() const { return _cblk; } + + GraphHelper_Scotch() = default; + + // convert graph first + GraphHelper_Scotch(const string label, + const ordinal_type m, + const size_type_array rptr, + const ordinal_type_array cidx, + const int seed = GraphHelper::DefaultRandomSeed) { + + _label = "GraphHelper_Scotch::" + label; + + _is_ordered = false; + _cblk = 0; + + // scotch does not allow self-contribution (diagonal term in sparse matrix) + _base = 0; //A.BaseVal(); + _m = m; // A.NumRows(); + _nnz = rptr[m]; //A.NumNonZeros(); + + _rptr = rptr; //size_type_array(_label+"::RowPtrArray", _m+1); + _cidx = cidx; //ordinal_type_array(_label+"::ColIndexArray", _nnz); + + _perm = ordinal_type_array(_label+"::PermutationArray", _m); + _peri = ordinal_type_array(_label+"::InvPermutationArray", _m); + _range = ordinal_type_array(_label+"::RangeArray", _m); + _tree = ordinal_type_array(_label+"::TreeArray", _m); + + // create a graph structure without diagonals + _strat = 0; + _level = 0; + + //A.convertGraph(_nnz, _rptr, _cidx); + + int ierr = 0; + ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device()); + ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device()); + + if (seed != GraphHelper::DefaultRandomSeed) { + SCOTCH_randomSeed(seed); + SCOTCH_randomReset(); + } + + ierr = SCOTCH_graphInit(&_graph);CHKERR(ierr); + ierr = SCOTCH_graphBuild(&_graph, // scotch graph + _base, // base value + _m, // # of vertices + rptr_ptr, // column index array pointer begin + rptr_ptr+1, // column index array pointer end + NULL, // weights on vertices (optional) + NULL, // label array on vertices (optional) + _nnz, // # of nonzeros + cidx_ptr, // column index array + NULL);CHKERR(ierr); // edge load array (optional) + ierr = SCOTCH_graphCheck(&_graph);CHKERR(ierr); + } + GraphHelper_Scotch(const GraphHelper_Scotch &b) = default; + + virtual~GraphHelper_Scotch() { + SCOTCH_graphFree(&_graph); + } + + void setStratGraph(const SCOTCH_Num strat = 0) { + _strat = strat; + } + + void setTreeLevel(const int level = 0) { + _level = level; + } + + int computeOrdering(const ordinal_type treecut = 0, + const ordinal_type minblksize = 0) { + int ierr = 0; + + // pointers for global graph ordering + ordinal_type *perm = _perm.ptr_on_device(); + ordinal_type *peri = _peri.ptr_on_device(); + ordinal_type *range = _range.ptr_on_device(); + ordinal_type *tree = _tree.ptr_on_device(); + + { + const int level = (_level ? _level : max(1, int(log2(_m)-treecut))); // level = log2(_nnz)+10; + SCOTCH_Strat stradat; + SCOTCH_Num straval = _strat; + //(SCOTCH_STRATLEVELMAX));// | + //SCOTCH_STRATLEVELMIN | + //SCOTCH_STRATLEAFSIMPLE | + //SCOTCH_STRATSEPASIMPLE); + + ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr); + + // if both are zero, do not run strategy + if (_strat || _level) { + cout << "GraphHelper_Scotch:: User provide a strategy and/or level" << endl + << " strategy = " << _strat << ", level = " << _level << endl; + ierr = SCOTCH_stratGraphOrderBuild (&stradat, straval, level, 0.2);CHKERR(ierr); + } + ierr = SCOTCH_graphOrder(&_graph, + &stradat, + perm, + peri, + &_cblk, + range, + tree);CHKERR(ierr); + SCOTCH_stratExit(&stradat); + } + +#if 0 + { + // assume there are multiple roots + range[_cblk+1] = range[_cblk]; // dummy range + tree[_cblk] = -1; // dummy root + for (ordinal_type i=0;i<_cblk;++i) + if (tree[i] == -1) // multiple roots becomes children of the hummy root + tree[i] = (_cblk+1); + ++_cblk; // include the dummy root + } +#endif + + // provided blksize is greater than 0, reorder internally + // if (treecut > 0 && minblksize > 0) { + // // graph array + // ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device()); + // ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device()); + + // // create workspace in + // size_type_array rptr_work = size_type_array(_label+"::Block::RowPtrArray", _m+1); + // ordinal_type_array cidx_work = ordinal_type_array(_label+"::Block::ColIndexArray", _nnz); + + // // create workspace output + // ordinal_type_array perm_work = ordinal_type_array(_label+"::Block::PermutationArray", _m); + // ordinal_type_array peri_work = ordinal_type_array(_label+"::Block::InvPermutationArray", _m); + // ordinal_type_array range_work = ordinal_type_array(_label+"::Block::RangeArray", _m); + // ordinal_type_array tree_work = ordinal_type_array(_label+"::Block::TreeArray", _m); + + // // scotch input + // ordinal_type *rptr_blk = reinterpret_cast<ordinal_type*>(rptr_work.ptr_on_device()); + // ordinal_type *cidx_blk = reinterpret_cast<ordinal_type*>(cidx_work.ptr_on_device()); + + // size_type nnz = 0; + // rptr_blk[0] = nnz; + + // for (ordinal_type iblk=0;iblk<_cblk;++iblk) { + // // allocate graph + // SCOTCH_Graph graph; + + // ierr = SCOTCH_graphInit(&graph);CHKERR(ierr); + + // SCOTCH_Strat stradat; + // SCOTCH_Num straval = (/*SCOTCH_STRATLEVELMAX | + // SCOTCH_STRATLEVELMIN |*/ + // SCOTCH_STRATLEAFSIMPLE | + // SCOTCH_STRATSEPASIMPLE); + + // ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr); + // ierr = SCOTCH_stratGraphOrderBuild(&stradat, straval, 0, 0.2);CHKERR(ierr); + + // const ordinal_type ibegin = range[iblk], iend = range[iblk+1], m = iend - ibegin; + + // // scotch output + // ordinal_type cblk_blk = 0; + + // ordinal_type *perm_blk = perm_work.ptr_on_device() + ibegin; + // ordinal_type *peri_blk = peri_work.ptr_on_device() + ibegin; + // ordinal_type *range_blk = range_work.ptr_on_device() + ibegin; + // ordinal_type *tree_blk = tree_work.ptr_on_device() + ibegin; + + // // if each blk is greater than the given minblksize, reorder internally + // if (m > minblksize) { + // for (int i=ibegin;i<iend;++i) { + // const ordinal_type ii = peri[i]; + // const ordinal_type jbegin = rptr_ptr[ii]; + // const ordinal_type jend = rptr_ptr[ii+1]; + + // for (int j=jbegin;j<jend;++j) { + // const ordinal_type jj = perm[cidx_ptr[j]]; + // if (ibegin <= jj && jj < iend) + // cidx_blk[nnz++] = (jj - ibegin); + // } + // rptr_blk[i+1] = nnz; + // } + // const size_type nnz_blk = nnz - rptr_blk[ibegin]; + + // ierr = SCOTCH_graphBuild(&graph, // scotch graph + // 0, // base value + // m, // # of vertices + // &rptr_blk[ibegin], // column index array pointer begin + // &rptr_blk[ibegin]+1,// column index array pointer end + // NULL, // weights on vertices (optional) + // NULL, // label array on vertices (optional) + // nnz_blk, // # of nonzeros + // cidx_blk, // column index array + // NULL);CHKERR(ierr); // edge load array (optional) + // ierr = SCOTCH_graphCheck(&graph);CHKERR(ierr); + // ierr = SCOTCH_graphOrder(&graph, + // &stradat, + // perm_blk, + // peri_blk, + // &cblk_blk, + // range_blk, + // tree_blk);CHKERR(ierr); + // } else { + // for (ordinal_type i=0;i<m;++i) { + // perm_blk[i] = i; + // peri_blk[i] = i; + // } + // range_blk[1] = m; + // tree_blk[0] = -1; + // } + + // SCOTCH_stratExit(&stradat); + // SCOTCH_graphFree(&graph); + + // for (ordinal_type i=0;i<m;++i) { + // const ordinal_type ii = peri_blk[i] + ibegin; + // peri_blk[i] = peri[ii]; + // } + // for (ordinal_type i=0;i<m;++i) { + // const ordinal_type ii = i + ibegin; + // peri[ii] = peri_blk[i]; + // } + + // } + + // for (ordinal_type i=0;i<_m;++i) + // perm[peri[i]] = i; + // } + + _is_ordered = true; + + //cout << "SCOTCH level = " << level << endl; + //cout << "Range Tree " << endl; + //for (int i=0;i<_cblk;++i) + // cout << _range[i] << " :: " << i << " " << _tree[i] << endl; + + return 0; + } + + int pruneTree(const ordinal_type cut) { + if (cut <=0 ) return 0; + + ordinal_type_array work = ordinal_type_array(_label+"::WorkArray", _cblk+1); + for (ordinal_type iter=0;iter<cut && _cblk > 1;++iter) { + // horizontal merging + { + ordinal_type cnt = 0; + ordinal_type parent = _tree[0]; + work[0] = cnt; + for (ordinal_type i=1;i<_cblk;++i) { + const ordinal_type myparent = _tree[i]; + if (myparent == parent) { + work[i] = cnt; + } else { + parent = _tree[i]; + work[i] = ++cnt; + } + } + work[_cblk] = ++cnt; + + ordinal_type prev = -2; + const ordinal_type root = _cblk - 1; + for (ordinal_type i=0;i<root;++i) { + const ordinal_type myparent = _tree[i]; + const ordinal_type me = work[i]; + + _tree[me] = work[myparent]; + if (prev != me) { + _range[me] = _range[i]; + prev = me; + } + } + { + const ordinal_type me = work[root]; + _tree[me] = -1; + _range[me] = _range[root]; + + _range[work[root+1]] = _range[root+1]; + _cblk = cnt; + } + } + + // vertical merging + if (_cblk == 2) { + _tree[0] = -1; + _range[0] = 0; + _range[1] = _range[2]; + _cblk = 1; + } else { + ordinal_type cnt = 0; + for (ordinal_type i=0;i<_cblk;++i) { + const ordinal_type diff = _tree[i+1] - _tree[i]; + work[i] = (diff == 1 ? cnt : cnt++); + } + work[_cblk] = cnt; + + ordinal_type prev = -2; + const ordinal_type root = _cblk - 1; + for (ordinal_type i=0;i<root;++i) { + const ordinal_type myparent = _tree[i]; + const ordinal_type me = work[i]; + + _tree[me] = work[myparent]; + if (prev != me) { + _range[me] = _range[i]; + prev = me; + } + } + { + const ordinal_type me = work[root]; + _tree[me] = -1; + _range[me] = _range[root]; + + _range[work[root+1]] = _range[root+1]; + _cblk = cnt; + } + } + } + + // cleaning + { + for (ordinal_type i=(_cblk+1);i<_m;++i) { + _tree[i] = 0; + _range[i] = 0; + } + _tree[_cblk] = 0; + } + + return 0; + } + + ostream& showMe(ostream &os) const { + streamsize prec = os.precision(); + os.precision(15); + os << scientific; + + os << " -- Scotch input -- " << endl + << " Base Value = " << _base << endl + << " # of Rows = " << _m << endl + << " # of NonZeros = " << _nnz << endl; + + if (_is_ordered) + os << " -- Ordering -- " << endl + << " CBLK = " << _cblk << endl + << " PERM PERI RANG TREE" << endl; + + const int w = 6; + for (ordinal_type i=0;i<_m;++i) + os << setw(w) << _perm[i] << " " + << setw(w) << _peri[i] << " " + << setw(w) << _range[i] << " " + << setw(w) << _tree[i] << endl; + + os.unsetf(ios::scientific); + os.precision(prec); + + return os; + } + + }; + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/herk.hpp b/lib/kokkos/example/ichol/src/herk.hpp new file mode 100644 index 0000000000000000000000000000000000000000..548c495c448604d2bffd7a5dd1d9745ce440fc9e --- /dev/null +++ b/lib/kokkos/example/ichol/src/herk.hpp @@ -0,0 +1,91 @@ +#pragma once +#ifndef __HERK_HPP__ +#define __HERK_HPP__ + +/// \file herk.hpp +/// \brief Sparse hermitian rank one update on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "control.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template<int ArgUplo, int ArgTrans, int ArgAlgo, + int ArgVariant = Variant::One, + template<int,int> class ControlType = Control> + struct Herk { + + // data-parallel interface + // ======================= + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeC> + KOKKOS_INLINE_FUNCTION + static int invoke(typename ExecViewTypeA::policy_type &policy, + const typename ExecViewTypeA::policy_type::member_type &member, + const ScalarType alpha, + typename ExecViewTypeA::matrix_type &A, + const ScalarType beta, + typename ExecViewTypeC::matrix_type &C); + + // task-data parallel interface + // ============================ + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeC> + class TaskFunctor { + public: + typedef typename ExecViewTypeA::policy_type policy_type; + typedef typename policy_type::member_type member_type; + typedef int value_type; + + private: + ScalarType _alpha, _beta; + typename ExecViewTypeA::matrix_type _A; + typename ExecViewTypeC::matrix_type _C; + + policy_type _policy; + + public: + KOKKOS_INLINE_FUNCTION + TaskFunctor(const policy_type & P, + const ScalarType alpha, + const typename ExecViewTypeA::matrix_type & A, + const ScalarType beta, + const typename ExecViewTypeC::matrix_type & C) + : _alpha(alpha), + _beta(beta), + _A(A), + _C(C), + _policy(P) + { } + + string Label() const { return "Herk"; } + + // task execution + KOKKOS_INLINE_FUNCTION + void apply(value_type &r_val) { + r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, _policy.member_single(), + _alpha, _A, _beta, _C); + } + + // task-data execution + KOKKOS_INLINE_FUNCTION + void apply(const member_type &member, value_type &r_val) { + r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, member, + _alpha, _A, _beta, _C); + } + + }; + + }; + +} + +#include "herk_u_ct.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/herk_u_ct.hpp b/lib/kokkos/example/ichol/src/herk_u_ct.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6de4a2fa5628f0bdd77da6fdfc916ad112569fce --- /dev/null +++ b/lib/kokkos/example/ichol/src/herk_u_ct.hpp @@ -0,0 +1,11 @@ +#pragma once +#ifndef __HERK_U_CT_HPP__ +#define __HERK_U_CT_HPP__ + +/// \file herk_u_ct.hpp +/// \brief Sparse hermitian rank one update on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "herk_u_ct_for_factor_blocked.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp new file mode 100644 index 0000000000000000000000000000000000000000..58bba2be3c9c5fba07a3a36a77545bca917778c3 --- /dev/null +++ b/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp @@ -0,0 +1,103 @@ +#pragma once +#ifndef __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__ +#define __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__ + +/// \file herk_u_ct_for_factor_blocked.hpp +/// \brief Sparse hermitian rank one update on given sparse patterns. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + + // Herk used in the factorization phase + // ==================================== + template<> + template<typename ScalarType, + typename CrsExecViewTypeA, + typename CrsExecViewTypeC> + KOKKOS_INLINE_FUNCTION + int + Herk<Uplo::Upper,Trans::ConjTranspose, + AlgoHerk::ForFactorBlocked> + ::invoke(typename CrsExecViewTypeA::policy_type &policy, + const typename CrsExecViewTypeA::policy_type::member_type &member, + const ScalarType alpha, + typename CrsExecViewTypeA::matrix_type &A, + const ScalarType beta, + typename CrsExecViewTypeC::matrix_type &C) { + typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; + typedef typename CrsExecViewTypeA::value_type value_type; + typedef typename CrsExecViewTypeA::row_view_type row_view_type; + + +if ( false && member.team_rank() == 0 ) { + printf("Herk [%d +%d)x[%d +%d)\n" + , C.OffsetRows() + , C.NumRows() + , C.OffsetCols() + , C.NumCols() + ); +} + + // scale the matrix C with beta + scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C); + + // C(i,j) += alpha*A'(i,k)*A(k,j) + for (ordinal_type k=0;k<A.NumRows();++k) { + row_view_type &a = A.RowView(k); + const ordinal_type nnz = a.NumNonZeros(); + + if (nnz > 0) { + +#if 0 + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, nnz), + [&](const ordinal_type i) { + const ordinal_type row_at_i = a.Col(i); + // const value_type val_at_ik = conj(a.Value(i)); + const value_type val_at_ik = a.Value(i); + + row_view_type &c = C.RowView(row_at_i); + + ordinal_type idx = 0; + for (ordinal_type j=i;j<nnz && (idx > -2);++j) { + const ordinal_type col_at_j = a.Col(j); + const value_type val_at_kj = a.Value(j); + + idx = c.Index(col_at_j, idx); + if (idx >= 0) + c.Value(idx) += alpha*val_at_ik*val_at_kj; + } + }); +#else + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, nnz*nnz), + [&](const ordinal_type ii) { + const ordinal_type i = ii / nnz ; + const ordinal_type j = ii % nnz ; + + row_view_type &c = C.RowView( a.Col(i) ); + + const ordinal_type idx = c.Index( a.Col(j) ); + + if (idx >= 0) { + c.Value(idx) += alpha* a.Value(i) * a.Value(j); + } + }); + +#endif + + member.team_barrier(); + } + } + + return 0; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/norm.hpp b/lib/kokkos/example/ichol/src/norm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..be77ee0dcf2b27f6a7e50fb8eeacb45dc9d50e82 --- /dev/null +++ b/lib/kokkos/example/ichol/src/norm.hpp @@ -0,0 +1,82 @@ +#pragma once +#ifndef __NORM_HPP__ +#define __NORM_HPP__ + +/// \file norm.hpp +/// \brief Compute norm of sparse or dense matrices. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + template<typename DenseExecViewType> + KOKKOS_INLINE_FUNCTION + auto + normOneDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { + typedef typename DenseExecViewType::ordinal_type ordinal_type; + typedef typename DenseExecViewType::value_type value_type; + typedef decltype(real(value_type())) norm_type; + + const ordinal_type mA = A.NumRows(); + const ordinal_type nA = A.NumCols(); + + norm_type r_val = 0.0; + + for (ordinal_type j=0;j<nA;++j) { + norm_type col_sum_at_j = 0.0; + for (ordinal_type i=0;i<mA;++i) + col_sum_at_j += abs(A.Value(i,j)); + r_val = max(r_val, col_sum_at_j); + } + return r_val; + } + + template<typename DenseExecViewType> + KOKKOS_INLINE_FUNCTION + auto + normInfDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { + typedef typename DenseExecViewType::ordinal_type ordinal_type; + typedef typename DenseExecViewType::value_type value_type; + typedef decltype(real(value_type())) norm_type; + + const ordinal_type mA = A.NumRows(); + const ordinal_type nA = A.NumCols(); + + norm_type r_val = 0.0; + + for (ordinal_type i=0;i<mA;++i) { + norm_type row_sum_at_i = 0.0; + for (ordinal_type j=0;j<nA;++j) + row_sum_at_i += abs(A.Value(i,j)); + r_val = max(r_val, row_sum_at_i); + } + return r_val; + } + + template<typename DenseExecViewType> + KOKKOS_INLINE_FUNCTION + auto + normFrobeniusDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { + typedef typename DenseExecViewType::ordinal_type ordinal_type; + typedef typename DenseExecViewType::value_type value_type; + typedef decltype(real(value_type())) norm_type; + + const ordinal_type mA = A.NumRows(); + const ordinal_type nA = A.NumCols(); + + norm_type r_val = 0.0; + + for (ordinal_type i=0;i<mA;++i) + for (ordinal_type j=0;j<nA;++j) { + value_type val = A.Value(i,j); + // r_val += conj(val)*val; + r_val += val*val; + } + return sqrt(r_val); + } + +} + +#endif + diff --git a/lib/kokkos/example/ichol/src/partition.hpp b/lib/kokkos/example/ichol/src/partition.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a3e9f7095a6b82b62e6c27bc5f91db0e253b0451 --- /dev/null +++ b/lib/kokkos/example/ichol/src/partition.hpp @@ -0,0 +1,381 @@ + +#ifndef __PARTITION_HPP__ +#define __PARTITION_HPP__ + +/// \file partition.hpp +/// \brief Matrix partitioning utilities. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_2x2(const MatView A, MatView &ATL, MatView &ATR, + /**************/ MatView &ABL, MatView &ABR, + const typename MatView::ordinal_type bm, + const typename MatView::ordinal_type bn, + const int quadrant) { + typename MatView::ordinal_type bmm, bnn; + + switch (quadrant) { + case Partition::TopLeft: + bmm = min(bm, A.NumRows()); + bnn = min(bn, A.NumCols()); + + ATL.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + case Partition::TopRight: + case Partition::BottomLeft: + Kokkos::abort("Tacho::Part_2x2 Not yet implemented"); + break; + case Partition::BottomRight: + bmm = A.NumRows() - min(bm, A.NumRows()); + bnn = A.NumCols() - min(bn, A.NumCols()); + + ATL.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + default: + Kokkos::abort("Tacho::Part_2x2 Invalid Input"); + break; + } + + ATR.setView(A.BaseObject(), + A.OffsetRows(), ATL.NumRows(), + A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols()); + + ABL.setView(A.BaseObject(), + A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(), + A.OffsetCols(), ATL.NumCols()); + + ABR.setView(A.BaseObject(), + A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(), + A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_1x2(const MatView A, MatView &AL, MatView &AR, + const typename MatView::ordinal_type bn, + const int side) { + typename MatView::ordinal_type bmm, bnn; + + switch (side) { + case Partition::Left: + bmm = A.NumRows(); + bnn = min(bn, A.NumCols()); + + AL.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + case Partition::Right: + bmm = A.NumRows(); + bnn = A.NumCols() - min(bn, A.NumCols()); + + AL.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + default: + Kokkos::abort("Tacho::Part_1x2 Invalid Input"); + break; + } + + AR.setView(A.BaseObject(), + A.OffsetRows(), A.NumRows(), + A.OffsetCols() + AL.NumCols(), A.NumCols() - AL.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_2x1(const MatView A, MatView &AT, + /*************/ MatView &AB, + const typename MatView::ordinal_type bm, + const int side) { + typename MatView::ordinal_type bmm, bnn; + + switch (side) { + case Partition::Top: + bmm = min(bm, A.NumRows()); + bnn = A.NumCols(); + + AT.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + case Partition::Bottom: + bmm = A.NumRows() - min(bm, A.NumRows()); + bnn = A.NumCols(); + + AT.setView(A.BaseObject(), + A.OffsetRows(), bmm, + A.OffsetCols(), bnn); + break; + default: + Kokkos::abort("Tacho::Part_2x1 Invalid Input"); + break; + } + + AB.setView(A.BaseObject(), + A.OffsetRows() + AT.NumRows(), A.NumRows() - AT.NumRows(), + A.OffsetCols(), A.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_2x2_to_3x3(const MatView ATL, const MatView ATR, MatView &A00, MatView &A01, MatView &A02, + /***********************************/ MatView &A10, MatView &A11, MatView &A12, + const MatView ABL, const MatView ABR, MatView &A20, MatView &A21, MatView &A22, + const typename MatView::ordinal_type bm, + const typename MatView::ordinal_type bn, + const int quadrant) { + switch (quadrant) { + case Partition::TopLeft: + Part_2x2(ATL, A00, A01, + /**/ A10, A11, + bm, bn, Partition::BottomRight); + + Part_2x1(ATR, A02, + /**/ A12, + bm, Partition::Bottom); + + Part_1x2(ABL, A20, A21, + bn, Partition::Right); + + A22.setView(ABR.BaseObject(), + ABR.OffsetRows(), ABR.NumRows(), + ABR.OffsetCols(), ABR.NumCols()); + break; + case Partition::TopRight: + case Partition::BottomLeft: + Kokkos::abort("Tacho::Part_???"); + break; + case Partition::BottomRight: + A00.setView(ATL.BaseObject(), + ATL.OffsetRows(), ATL.NumRows(), + ATL.OffsetCols(), ATL.NumCols()); + + Part_1x2(ATR, A01, A02, + bn, Partition::Left); + + Part_2x1(ABL, A10, + /**/ A20, + bm, Partition::Top); + + Part_2x2(ABR, A11, A12, + /**/ A21, A22, + bm, bn, Partition::TopLeft); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_2x1_to_3x1(const MatView AT, MatView &A0, + /***************/ MatView &A1, + const MatView AB, MatView &A2, + const typename MatView::ordinal_type bm, + const int side) { + switch (side) { + case Partition::Top: + Part_2x1(AT, A0, + /**/ A1, + bm, Partition::Bottom); + + A2.setView(AB.BaseObject(), + AB.OffsetRows(), AB.NumRows(), + AB.OffsetCols(), AB.NumCols()); + break; + case Partition::Bottom: + A0.setView(AT.BaseObject(), + AT.OffsetRows(), AT.NumRows(), + AT.OffsetCols(), AT.NumCols()); + + Part_2x1(AB, A1, + /**/ A2, + bm, Partition::Top); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Part_1x2_to_1x3(const MatView AL, const MatView AR, + MatView &A0, MatView &A1, MatView &A2, + const typename MatView::ordinal_type bn, + const int side) { + switch (side) { + case Partition::Left: + Part_1x2(AL, A0, A1, + bn, Partition::Right); + + A2.setView(AR.BaseObaject(), + AR.OffsetRows(), AR.NumRows(), + AR.OffsetCols(), AR.NumCols()); + break; + case Partition::Right: + A0.setView(AL.BaseObject(), + AL.OffsetRows(), AL.NumRows(), + AL.OffsetCols(), AL.NumCols()); + + Part_1x2(AR, A1, A2, + bn, Partition::Left); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_2x2(const MatView ATL, const MatView ATR, + const MatView ABL, const MatView ABR, MatView &A) { + A.setView(ATL.BaseObject(), + ATL.OffsetRows(), ATL.NumRows() + ABR.NumRows(), + ATL.OffsetCols(), ATL.NumCols() + ABR.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_1x2(const MatView AL, const MatView AR, MatView &A) { + A.setView(AL.BaseObject(), + AL.OffsetRows(), AL.NumRows(), + AL.OffsetCols(), AL.NumCols() + AR.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_2x1(const MatView AT, + const MatView AB, MatView &A) { + A.setView(AT.BaseObject(), + AT.OffsetRows(), AT.NumRows() + AB.NumRows(), + AT.OffsetCols(), AT.NumCols()); + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_3x3_to_2x2(const MatView A00, const MatView A01, const MatView A02, MatView &ATL, MatView &ATR, + const MatView A10, const MatView A11, const MatView A12, + const MatView A20, const MatView A21, const MatView A22, MatView &ABL, MatView &ABR, + const int quadrant) { + switch (quadrant) { + case Partition::TopLeft: + Merge_2x2(A00, A01, + A10, A11, ATL); + + Merge_2x1(A02, + A12, ATR); + + Merge_1x2(A20, A21, ABL); + + ABR.setView(A22.BaseObject(), + A22.OffsetRows(), A22.NumRows(), + A22.OffsetCols(), A22.NumCols()); + break; + case Partition::TopRight: + case Partition::BottomLeft: + Kokkos::abort("Tacho::Part_???"); + break; + case Partition::BottomRight: + ATL.setView(A00.BaseObject(), + A00.OffsetRows(), A00.NumRows(), + A00.OffsetCols(), A00.NumCols()); + + Merge_1x2(A01, A02, ATR); + + Merge_2x1(A10, + A20, ABL); + + Merge_2x2(A11, A12, + A21, A22, ABR); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_3x1_to_2x1(const MatView A0, MatView &AT, + const MatView A1, + const MatView A2, MatView &AB, + const int side) { + switch (side) { + case Partition::Top: + Merge_2x1(A0, + A1, AT); + + AB.setView(A2.BaseObject(), + A2.OffsetRows(), A2.NumRows(), + A2.OffsetCols(), A2.NumCols()); + break; + case Partition::Bottom: + AT.setView(A0.BaseObject(), + A0.OffsetRows(), A0.NumRows(), + A0.OffsetCols(), A0.NumCols()); + + Merge_2x1(A1, + A2, AB); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + template<typename MatView> + KOKKOS_INLINE_FUNCTION + void + Merge_1x3_to_1x2(const MatView A0, const MatView A1, const MatView A2, + MatView &AL, MatView &AR, + const int side) { + switch (side) { + case Partition::Left: + Merge_1x2(A0, A1, AL); + + AR.setView(A2.BaseObject(), + A2.OffsetRows(), A2.NumRows(), + A2.OffsetCols(), A2.NumCols()); + break; + case Partition::Right: + AL.setView(A0.BaseObject(), + A0.OffsetRows(), A0.NumRows(), + A0.OffsetCols(), A0.NumCols()); + + Merge_1x2(A1, A2, AR); + break; + default: + Kokkos::abort("Tacho::Part_???"); + break; + } + } + + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/scale.hpp b/lib/kokkos/example/ichol/src/scale.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3152520966d88caeaede7d81c8a9bf826400d610 --- /dev/null +++ b/lib/kokkos/example/ichol/src/scale.hpp @@ -0,0 +1,92 @@ +#pragma once +#ifndef __SCALE_HPP__ +#define __SCALE_HPP__ + +/// \file scale.hpp +/// \brief Scaling sparse matrix. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + template<typename T> struct ScaleTraits { + typedef T scale_type; + // assume built-in types have appropriate type conversion + static constexpr T one = 1 ; + static constexpr T zero = 0 ; + }; + + + template<typename ScalarType, + typename CrsExecViewType> + KOKKOS_INLINE_FUNCTION + int + scaleCrsMatrix(const typename CrsExecViewType::policy_type::member_type &member, + const ScalarType alpha, + typename CrsExecViewType::matrix_type &A) { + typedef typename CrsExecViewType::ordinal_type ordinal_type; + typedef typename CrsExecViewType::value_type value_type; + typedef typename CrsExecViewType::row_view_type row_view_type; + + if (alpha == ScaleTraits<value_type>::one) { + // do nothing + } else { + const ordinal_type mA = A.NumRows(); + if (mA > 0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA), + [&](const ordinal_type i) { + row_view_type &row = A.RowView(i); + for (ordinal_type j=0;j<row.NumNonZeros();++j) + row.Value(j) *= alpha; + }); + member.team_barrier(); + } + } + + return 0; + } + + template<typename ScalarType, + typename DenseExecViewType> + KOKKOS_INLINE_FUNCTION + int + scaleDenseMatrix(const typename DenseExecViewType::policy_type::member_type &member, + const ScalarType alpha, + DenseExecViewType &A) { + typedef typename DenseExecViewType::ordinal_type ordinal_type; + typedef typename DenseExecViewType::value_type value_type; + + if (alpha == ScaleTraits<value_type>::one) { + // do nothing + } else { + if (A.BaseObject().ColStride() > A.BaseObject().RowStride()) { + const ordinal_type nA = A.NumCols(); + if (nA > 0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nA), + [&](const ordinal_type j) { + for (ordinal_type i=0;i<A.NumRows();++i) + A.Value(i, j) *= alpha; + }); + member.team_barrier(); + } + } else { + const ordinal_type mA = A.NumRows(); + if (mA > 0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA), + [&](const ordinal_type i) { + for (ordinal_type j=0;j<A.NumCols();++j) + A.Value(i, j) *= alpha; + }); + member.team_barrier(); + } + } + } + + return 0; + } + +} + +#endif + diff --git a/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp b/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f6c381a99817ca5254ef3563fe48941410870ad7 --- /dev/null +++ b/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp @@ -0,0 +1,379 @@ +#pragma once +#ifndef __SYMBOLIC_FACTOR_HELPER_HPP__ +#define __SYMBOLIC_FACTOR_HELPER_HPP__ + +/// \file symbolic_factor_helper.hpp +/// \brief The class compute a nonzero pattern with a given level of fills +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" + +namespace Tacho { + + using namespace std; + + template<class CrsMatrixType> + class SymbolicFactorHelper : public Disp { + public: + typedef typename CrsMatrixType::ordinal_type ordinal_type; + typedef typename CrsMatrixType::size_type size_type; + + typedef typename Kokkos::HostSpace::execution_space host_exec_space ; + + typedef typename CrsMatrixType::ordinal_type_array ordinal_type_array; + typedef typename CrsMatrixType::size_type_array size_type_array; + typedef typename CrsMatrixType::value_type_array value_type_array; + + private: + string _label; // name of this class + + // matrix index base + CrsMatrixType _A; // input matrix + ordinal_type _m, _n; // matrix dimension + + struct crs_graph { + size_type_array _ap; // row ptr array + ordinal_type_array _aj; // col index array + size_type _nnz; // # of nonzeros + }; + typedef struct crs_graph crs_graph_type; + crs_graph_type _in, _out; + + typedef Kokkos::View<ordinal_type**, Kokkos::LayoutLeft, host_exec_space> league_specific_ordinal_type_array; + typedef typename league_specific_ordinal_type_array::value_type* league_specific_ordinal_type_array_ptr; + + int _lsize; + league_specific_ordinal_type_array _queue, _visited, _distance; + + void createInternalWorkSpace() { + _queue = league_specific_ordinal_type_array(_label+"::QueueArray", _m, _lsize); + _visited = league_specific_ordinal_type_array(_label+"::VisitedArray", _m, _lsize); + _distance = league_specific_ordinal_type_array(_label+"::DistanceArray", _m, _lsize); + } + + void freeInternalWorkSpace() { + _queue = league_specific_ordinal_type_array(); + _visited = league_specific_ordinal_type_array(); + _distance = league_specific_ordinal_type_array(); + } + + public: + + void setLabel(string label) { _label = label; } + string Label() const { return _label; } + + SymbolicFactorHelper(const CrsMatrixType &A, + const int lsize = (host_exec_space::thread_pool_size(0)/ + host_exec_space::thread_pool_size(2))) { + + _label = "SymbolicFactorHelper::" ; + + // matrix index base and the number of rows + _A = A; + + _m = _A.NumRows(); + _n = _A.NumCols(); + + // allocate memory for input crs matrix + _in._nnz = _A.NumNonZeros(); + _in._ap = size_type_array(_label+"::Input::RowPtrArray", _m+1); + _in._aj = ordinal_type_array(_label+"::Input::ColIndexArray", _in._nnz); + + // adjust graph structure; A is assumed to have a graph without its diagonal + A.convertGraph(_in._ap, _in._aj); + _in._nnz = _in._ap[_m]; + + // league size + _lsize = lsize; + + // create workspace per league + createInternalWorkSpace(); + } + virtual~SymbolicFactorHelper() { + freeInternalWorkSpace(); + } + + class Queue { + private: + league_specific_ordinal_type_array_ptr _q; + ordinal_type _begin, _end; + + public: + Queue(league_specific_ordinal_type_array_ptr q) + : _q(q),_begin(0),_end(0) { } + + ordinal_type size() const { return _end - _begin; } + bool empty() const { return !size(); } + + void push(const ordinal_type val) { _q[_end++] = val; } + ordinal_type pop() { return _q[_begin++]; } + ordinal_type end() { return _end; } + void reset() { _begin = 0; _end = 0; } + }; + + class FunctorComputeNonZeroPatternInRow { + public: + typedef Kokkos::TeamPolicy<host_exec_space> policy_type; + + private: + ordinal_type _level, _m; + crs_graph_type _graph; + + league_specific_ordinal_type_array _queue; + league_specific_ordinal_type_array _visited; + league_specific_ordinal_type_array _distance; + + size_type_array _ap; + ordinal_type_array _aj; + + ordinal_type _phase; + + public: + FunctorComputeNonZeroPatternInRow(const ordinal_type level, + const ordinal_type m, + const crs_graph_type &graph, + league_specific_ordinal_type_array &queue, + league_specific_ordinal_type_array &visited, + league_specific_ordinal_type_array &distance, + size_type_array &ap, + ordinal_type_array &aj) + : _level(level), _m(m), _graph(graph), + _queue(queue), _visited(visited), _distance(distance), + _ap(ap), _aj(aj), _phase(0) + { } + + void setPhaseCountNumNonZeros() { _phase = 0; } + void setPhaseComputeColIndex() { _phase = 1; } + + inline + void operator()(const typename policy_type::member_type &member) const { + const int lrank = member.league_rank(); + const int lsize = member.league_size(); + + league_specific_ordinal_type_array_ptr queue = &_queue(0, lrank); + league_specific_ordinal_type_array_ptr distance = &_distance(0, lrank); + league_specific_ordinal_type_array_ptr visited = &_visited(0, lrank); + + for (ordinal_type i=0;i<_m;++i) + visited[i] = 0; + + // shuffle rows to get better load balance; + // for instance, if ND is applied, more fills are generated in the last seperator. + for (ordinal_type i=lrank;i<_m;i+=lsize) { + + size_type cnt = 0; + + // account for the diagonal + switch (_phase) { + case 0: + cnt = 1; + break; + case 1: + cnt = _ap[i]; + _aj[cnt++] = i; + break; + } + + { + Queue q(queue); // fixed size queue + + // initialize work space + q.push(i); + distance[i] = 0; + + const ordinal_type id = (i+1); + visited[i] = id; + + // breath first search for i + while (!q.empty()) { + const ordinal_type h = q.pop(); + // loop over j adjancy + const ordinal_type jbegin = _graph._ap[h], jend = _graph._ap[h+1]; + for (ordinal_type j=jbegin;j<jend;++j) { + const ordinal_type t = _graph._aj[j]; + if (visited[t] != id) { + visited[t] = id; + + if (t < i && (_level < 0 || distance[h] < _level)) { + q.push(t); + distance[t] = distance[h] + 1; + } + if (t > i) { + switch (_phase) { + case 0: + ++cnt; + break; + case 1: + _aj[cnt++] = t; + break; + } + } + } + } + } + + // clear work space + for (ordinal_type j=0;j<q.end();++j) { + const ordinal_type jj = queue[j]; + distance[jj] = 0; + } + q.reset(); + } + switch (_phase) { + case 0: + _ap[i+1] = cnt; + break; + case 1: + sort(_aj.data() + _ap[i] , _aj.data() + _ap[i+1]); + break; + } + } + } + }; + + class FunctorCountOffsetsInRow { + public: + typedef Kokkos::RangePolicy<host_exec_space> policy_type; + typedef size_type value_type; + + private: + size_type_array _off_in_rows; + + public: + FunctorCountOffsetsInRow(size_type_array &off_in_rows) + : _off_in_rows(off_in_rows) + { } + + KOKKOS_INLINE_FUNCTION + void init(value_type &update) const { + update = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename policy_type::member_type &i, value_type &update, const bool final) const { + update += _off_in_rows(i); + if (final) + _off_in_rows(i) = update; + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type &update, + volatile const value_type &input) const { + update += input; + } + }; + + int createNonZeroPattern(const ordinal_type level, + const int uplo, + CrsMatrixType &F) { + // all output array should be local and rcp in Kokkos::View manage memory (de)allocation + size_type_array ap = size_type_array(_label+"::Output::RowPtrArray", _m+1); + + // later determined + ordinal_type_array aj; + value_type_array ax; + size_type nnz = 0; + + { + FunctorComputeNonZeroPatternInRow functor(level, _m, _in, + _queue, + _visited, + _distance, + ap, + aj); + + functor.setPhaseCountNumNonZeros(); + Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor); + } + { + FunctorCountOffsetsInRow functor(ap); + Kokkos::parallel_scan(typename FunctorCountOffsetsInRow::policy_type(0, _m+1), functor); + } + + nnz = ap[_m]; + aj = ordinal_type_array(_label+"::Output::ColIndexArray", nnz); + ax = value_type_array(_label+"::Output::ValueArray", nnz); + + { + FunctorComputeNonZeroPatternInRow functor(level, _m, _in, + _queue, + _visited, + _distance, + ap, + aj); + + functor.setPhaseComputeColIndex(); + Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor); + } + + { + F = CrsMatrixType("dummy", _m, _n, nnz, ap, aj, ax); + F.add(_A); + } + + // record the symbolic factors + _out._nnz = nnz; + _out._ap = ap; + _out._aj = aj; + + return 0; + } + + int createNonZeroPattern(const int uplo, + CrsMatrixType &F) { + return createNonZeroPattern(-1, uplo, F); + } + + ostream& showMe(ostream &os) const { + streamsize prec = os.precision(); + os.precision(15); + os << scientific; + + const int w = 6; + + os << " -- Matrix Dimension -- " << endl + << " # of Rows = " << _m << endl + << " # of Cols = " << _n << endl; + + os << endl; + + os << " -- Input Graph Without Diagonals -- " << endl + << " # of NonZeros = " << _in._nnz << endl ; + + os << " -- Input Graph :: RowPtr -- " << endl; + { + const ordinal_type n0 = _in._ap.dimension_0(); + for (ordinal_type i=0;i<n0;++i) + os << setw(w) << i + << setw(w) << _in._ap[i] + << endl; + } + + os << endl; + + os << " -- Output Graph With Diagonals-- " << endl + << " # of NonZeros = " << _out._nnz << endl ; + + os << " -- Output Graph :: RowPtr -- " << endl; + { + const ordinal_type n0 = _out._ap.dimension_0(); + for (ordinal_type i=0;i<n0;++i) + os << setw(w) << i + << setw(w) << _out._ap[i] + << endl; + } + + os.unsetf(ios::scientific); + os.precision(prec); + + return os; + } + + }; + +} + +#endif + + + diff --git a/lib/kokkos/example/ichol/src/symbolic_task.hpp b/lib/kokkos/example/ichol/src/symbolic_task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f6cdc28ab133d123803fff40d5906cfaa58371ea --- /dev/null +++ b/lib/kokkos/example/ichol/src/symbolic_task.hpp @@ -0,0 +1,118 @@ +#pragma once +#ifndef __SYMBOLIC_TASK_HPP__ +#define __SYMBOLIC_TASK_HPP__ + +/// \file symbolic_task.hpp +/// \brief Provides tasking interface with graphviz output. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + /// \brief Graphviz color mapping for the generated tasks. + static map<string,string> g_graphviz_color = { + { "chol/scalar", "indianred2"}, + { "chol/trsm", "orange2" }, + { "chol/gemm", "lightblue2"} }; + + class SymbolicTaskQueue; + + class SymbolicTask { + private: + string _name; + set<SymbolicTask*> _dep_tasks; + + public: + // at this moment, make the queue global + // but this should be local and work with + // multiple queues with separate thread teams + typedef SymbolicTaskQueue queue; + + SymbolicTask() + : _name("no-name") + { } + + SymbolicTask(const SymbolicTask &b) + : _name(b._name) + { } + + SymbolicTask(const string name) + : _name(name) + { } + + int addDependence(SymbolicTask *b) { + if (b != NULL) + _dep_tasks.insert(b); + return 0; + } + + int clearDependence() { + _dep_tasks.clear(); + return 0; + } + + ostream& showMe(ostream &os) const { + os << " uid = " << this << " , name = " << _name << ", # of deps = " << _dep_tasks.size() << endl; + if (_dep_tasks.size()) { + for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) + os << " " << (*it) << " , name = " << (*it)->_name << endl; + } + return os; + } + + ostream& graphviz(ostream &os) const { + os << (long)(this) + << " [label=\"" << _name ; + auto it = g_graphviz_color.find(_name); + if (it != g_graphviz_color.end()) + os << "\" ,style=filled,color=\"" << it->second << "\" "; + os << "];"; + for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) + os << (long)(*it) << " -> " << (long)this << ";"; + return (os << endl); + } + + }; + + static vector<SymbolicTask*> g_queue; + + class SymbolicTaskQueue { + public: + static SymbolicTask* push(SymbolicTask *task) { + g_queue.push_back(task); + return g_queue.back(); + } + + static int clear() { + for (auto it=g_queue.begin();it!=g_queue.end();++it) + delete (*it); + g_queue.clear(); + return 0; + } + + static ostream& showMe(ostream &os) { + if (g_queue.size()) { + os << " -- Symbolic Task Queue -- " << endl; + for (auto it=g_queue.begin();it!=g_queue.end();++it) + (*it)->showMe(os); + } else { + os << " -- Symbolic Task Queue is empty -- " << endl; + } + return os; + } + + static ostream& graphviz(ostream &os, + const double width = 7.5, + const double length = 10.0) { + os << "digraph TaskGraph {" << endl; + os << "size=\"" << width << "," << length << "\";" << endl; + for (auto it=g_queue.begin();it!=g_queue.end();++it) + (*it)->graphviz(os); + os << "}" << endl; + return (os << endl); + } + }; + +} +#endif diff --git a/lib/kokkos/example/ichol/src/task_factory.hpp b/lib/kokkos/example/ichol/src/task_factory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b829da6737dfa3423b800aa6021b2c33e94b2c78 --- /dev/null +++ b/lib/kokkos/example/ichol/src/task_factory.hpp @@ -0,0 +1,77 @@ +#pragma once +#ifndef __TASK_FACTORY_HPP__ +#define __TASK_FACTORY_HPP__ + +/// \file task_factory.hpp +/// \brief A wrapper for task policy and future with a provided space type. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + /// \class TaskFactory + /// \brief Minimal interface to Kokkos tasking. + /// + /// TaskFactory is attached to blocks as a template argument in order to + /// create and manage tasking future objects. Note that policy (shared + /// pointer to the task generator) is not a member object in this class. + /// This class includes minimum interface for tasking with type decralation + /// of the task policy and template alias of future so that future objects + /// generated in this class will match to their policy and its execution space. + /// + template<typename PolicyType, + typename FutureType> + class TaskFactory { + private: + static constexpr int _max_task_dependence = 10 ; + + public: + typedef PolicyType policy_type; + typedef FutureType future_type; + + template<typename TaskFunctorType> + static KOKKOS_INLINE_FUNCTION + future_type create(policy_type &policy, const TaskFunctorType &func) { + + future_type f ; + // while ( f.is_null() ) { + f = policy.task_create_team(func, _max_task_dependence); + // } + if ( f.is_null() ) Kokkos::abort("task_create_team FAILED, out of memory"); + return f ; + } + + static KOKKOS_INLINE_FUNCTION + void spawn(policy_type &policy, const future_type &obj, bool priority = false ) { + policy.spawn(obj,priority); + } + + static KOKKOS_INLINE_FUNCTION + void addDependence(policy_type &policy, + const future_type &after, const future_type &before) { + policy.add_dependence(after, before); + } + + template<typename TaskFunctorType> + static KOKKOS_INLINE_FUNCTION + void addDependence(policy_type &policy, + TaskFunctorType *after, const future_type &before) { + policy.add_dependence(after, before); + } + + template<typename TaskFunctorType> + static KOKKOS_INLINE_FUNCTION + void clearDependence(policy_type &policy, TaskFunctorType *func) { + policy.clear_dependence(func); + } + + template<typename TaskFunctorType> + static KOKKOS_INLINE_FUNCTION + void respawn(policy_type &policy, TaskFunctorType *func) { + policy.respawn(func); + } + }; +} + +#endif diff --git a/lib/kokkos/example/ichol/src/task_view.hpp b/lib/kokkos/example/ichol/src/task_view.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ce280a325fd6a460c687f15e0a69c4aa6dd0e8b5 --- /dev/null +++ b/lib/kokkos/example/ichol/src/task_view.hpp @@ -0,0 +1,104 @@ +#pragma once +#ifndef __TASK_VIEW_HPP__ +#define __TASK_VIEW_HPP__ + +/// \file task_view.hpp +/// \brief Task view is inherited from matrix view and have a member for the task handler. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +namespace Tacho { + + using namespace std; + + template<typename MatrixViewType, + typename TaskFactoryType> + class TaskView : public MatrixViewType { + public: + typedef MatrixViewType matrix_type ; + typedef typename MatrixViewType::value_type value_type; + typedef typename MatrixViewType::ordinal_type ordinal_type; + + typedef TaskFactoryType task_factory_type; + typedef typename task_factory_type::policy_type policy_type; + typedef typename task_factory_type::future_type future_type; + + private: + future_type _f; + + public: + KOKKOS_INLINE_FUNCTION + void setFuture(const future_type &f) + { _f = f; } + + KOKKOS_INLINE_FUNCTION + future_type Future() const { return _f; } + + KOKKOS_INLINE_FUNCTION + ~TaskView() = default ; + + KOKKOS_INLINE_FUNCTION + TaskView() + : MatrixViewType(), _f() + { } + + TaskView(const TaskView &b) = delete ; + + KOKKOS_INLINE_FUNCTION + TaskView(typename MatrixViewType::mat_base_type const & b) + : MatrixViewType(b), _f() + { } + + KOKKOS_INLINE_FUNCTION + TaskView(typename MatrixViewType::mat_base_type const & b, + const ordinal_type offm, const ordinal_type m, + const ordinal_type offn, const ordinal_type n) + : MatrixViewType(b, offm, m, offn, n), _f() + { } + + }; +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if ! KOKKOS_USING_EXP_VIEW + +namespace Kokkos { + namespace Impl { + + // The Kokkos::View allocation will by default assign each allocated datum to zero. + // This is not the required initialization behavior when + // non-trivial objects are used within a Kokkos::View. + // Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct + // to replace the assignment initialization with placement new initialization. + // + // This work-around is necessary until a TBD design refactorization of Kokkos::View. + + template< class ExecSpace , typename T1, typename T2 > + struct ViewDefaultConstruct< ExecSpace , Tacho::TaskView<T1,T2> , true > + { + typedef Tacho::TaskView<T1,T2> type ; + type * const m_ptr ; + + KOKKOS_FORCEINLINE_FUNCTION + void operator()( const typename ExecSpace::size_type& i ) const + { new(m_ptr+i) type(); } + + ViewDefaultConstruct( type * pointer , size_t capacity ) + : m_ptr( pointer ) + { + Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); + parallel_for( range , *this ); + ExecSpace::fence(); + } + }; + + } // namespace Impl +} // namespace Kokkos + +#endif /* #if ! KOKKOS_USING_EXP_VIEW */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif diff --git a/lib/kokkos/example/ichol/src/trsm.hpp b/lib/kokkos/example/ichol/src/trsm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b4a6a7df48967257f824ae73680bf918d457be76 --- /dev/null +++ b/lib/kokkos/example/ichol/src/trsm.hpp @@ -0,0 +1,92 @@ +#pragma once +#ifndef __TRSM_HPP__ +#define __TRSM_HPP__ + +/// \file trsm.hpp +/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "util.hpp" +#include "control.hpp" +#include "partition.hpp" + +namespace Tacho { + + using namespace std; + + template<int ArgSide,int ArgUplo, int ArgTrans, int ArgAlgo, + int ArgVariant = Variant::One, + template<int,int> class ControlType = Control> + struct Trsm { + + // data-parallel interface + // ======================= + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeB> + KOKKOS_INLINE_FUNCTION + static int invoke(typename ExecViewTypeA::policy_type &policy, + const typename ExecViewTypeA::policy_type::member_type &member, + const int diagA, + const ScalarType alpha, + typename ExecViewTypeA::matrix_type &A, + typename ExecViewTypeB::matrix_type &B); + + // task-data parallel interface + // ============================ + template<typename ScalarType, + typename ExecViewTypeA, + typename ExecViewTypeB> + class TaskFunctor { + public: + typedef typename ExecViewTypeA::policy_type policy_type; + typedef typename policy_type::member_type member_type; + typedef int value_type; + + private: + int _diagA; + ScalarType _alpha; + typename ExecViewTypeA::matrix_type _A; + typename ExecViewTypeB::matrix_type _B; + + policy_type _policy; + + public: + KOKKOS_INLINE_FUNCTION + TaskFunctor(const policy_type & P, + const int diagA, + const ScalarType alpha, + const ExecViewTypeA & A, + const ExecViewTypeB & B) + : _diagA(diagA), + _alpha(alpha), + _A(A), + _B(B), + _policy(P) + { } + + string Label() const { return "Trsm"; } + + // task execution + KOKKOS_INLINE_FUNCTION + void apply(value_type &r_val) { + r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, _policy.member_single(), + _diagA, _alpha, _A, _B); + } + + // task-data execution + KOKKOS_INLINE_FUNCTION + void apply(const member_type &member, value_type &r_val) { + r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, member, + _diagA, _alpha, _A, _B); + } + + }; + }; + +} + +// #include "trsm_l_u_nt.hpp" +#include "trsm_l_u_ct.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b6f3289474518bd88e55db198e4d2ad8efa7e435 --- /dev/null +++ b/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp @@ -0,0 +1,14 @@ +#pragma once +#ifndef __TRSM_L_U_CT_HPP__ +#define __TRSM_L_U_CT_HPP__ + +/// \file trsm_l_u_ct.hpp +/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. +/// \author Kyungjoo Kim (kyukim@sandia.gov) +/// +#include "gemm.hpp" + +#include "trsm_l_u_ct_for_factor_blocked.hpp" +// #include "trsm_l_u_ct_for_tri_solve_blocked.hpp" + +#endif diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7414e5d80f07f895a8cd4e5182acb3fc9976be58 --- /dev/null +++ b/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp @@ -0,0 +1,185 @@ +#pragma once +#ifndef __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__ +#define __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__ + +/// \file trsm_l_u_ct_for_factor_blocked.hpp +/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. +/// \author Kyungjoo Kim (kyukim@sandia.gov) +/// + +namespace Tacho { + + using namespace std; + + // Trsm used in the factorization phase: data parallel on b1t + // ========================================================== + template<> + template<typename ScalarType, + typename CrsExecViewTypeA, + typename CrsExecViewTypeB> + KOKKOS_INLINE_FUNCTION + int + Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose, + AlgoTrsm::ForFactorBlocked,Variant::One> + ::invoke(typename CrsExecViewTypeA::policy_type &policy, + const typename CrsExecViewTypeA::policy_type::member_type &member, + const int diagA, + const ScalarType alpha, + typename CrsExecViewTypeA::matrix_type &A, + typename CrsExecViewTypeB::matrix_type &B) { + typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; + typedef typename CrsExecViewTypeA::value_type value_type; + typedef typename CrsExecViewTypeA::row_view_type row_view_type; + + +if ( false && member.team_rank() == 0 ) { + printf("Trsm [%d +%d)x[%d +%d)\n" + , B.OffsetRows() + , B.NumRows() + , B.OffsetCols() + , B.NumCols() + ); +} + + // scale the matrix B with alpha + scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B); + + // Solve a system: AX = B -> B := inv(A) B + const ordinal_type mA = A.NumRows(); + const ordinal_type nB = B.NumCols(); + + if (nB > 0) { + for (ordinal_type k=0;k<mA;++k) { + row_view_type &a = A.RowView(k); + // const value_type cdiag = std::conj(a.Value(0)); // for complex<T> + const value_type cdiag = a.Value(0); + + // invert + row_view_type &b1 = B.RowView(k); + const ordinal_type nnz_b1 = b1.NumNonZeros(); + + if (diagA != Diag::Unit && nnz_b1 > 0) { + // b1t = b1t / conj(diag) + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), + [&](const ordinal_type j) { + b1.Value(j) /= cdiag; + }); + } + + // update + const ordinal_type nnz_a = a.NumNonZeros(); + if (nnz_a > 0) { + // B2 = B2 - trans(conj(a12t)) b1t + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), + [&](const ordinal_type j) { + // grab b1 + const ordinal_type col_at_j = b1.Col(j); + const value_type val_at_j = b1.Value(j); + + for (ordinal_type i=1;i<nnz_a;++i) { + // grab a12t + const ordinal_type row_at_i = a.Col(i); + // const value_type val_at_i = conj(a.Value(i)); + const value_type val_at_i = a.Value(i); + + // grab b2 + row_view_type &b2 = B.RowView(row_at_i); + + // check and update + ordinal_type idx = 0; + idx = b2.Index(col_at_j, idx); + if (idx >= 0) + b2.Value(idx) -= val_at_i*val_at_j; + } + }); + } + member.team_barrier(); + } + } + + return 0; + } + + // Trsm used in the factorization phase: data parallel on a1t + // ========================================================== + template<> + template<typename ScalarType, + typename CrsExecViewTypeA, + typename CrsExecViewTypeB> + KOKKOS_INLINE_FUNCTION + int + Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose, + AlgoTrsm::ForFactorBlocked,Variant::Two> + ::invoke(typename CrsExecViewTypeA::policy_type &policy, + const typename CrsExecViewTypeA::policy_type::member_type &member, + const int diagA, + const ScalarType alpha, + typename CrsExecViewTypeA::matrix_type &A, + typename CrsExecViewTypeB::matrix_type &B) { + typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; + typedef typename CrsExecViewTypeA::value_type value_type; + typedef typename CrsExecViewTypeA::row_view_type row_view_type; + + // scale the matrix B with alpha + scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B); + + // Solve a system: AX = B -> B := inv(A) B + const ordinal_type mA = A.NumRows(); + const ordinal_type nB = B.NumCols(); + + if (nB > 0) { + for (ordinal_type k=0;k<mA;++k) { + row_view_type &a = A.RowView(k); + // const value_type cdiag = conj(a.Value(0)); + const value_type cdiag = a.Value(0); + + // invert + row_view_type &b1 = B.RowView(k); + const ordinal_type nnz_b1 = b1.NumNonZeros(); + + if (diagA != Diag::Unit && nnz_b1 > 0) { + // b1t = b1t / conj(diag) + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), + [&](const ordinal_type j) { + b1.Value(j) /= cdiag; + }); + member.team_barrier(); + } + + // update + const ordinal_type nnz_a = a.NumNonZeros(); + if (nnz_a > 0) { + // B2 = B2 - trans(conj(a12t)) b1t + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_a), + [&](const ordinal_type i) { + // grab a12t + const ordinal_type row_at_i = a.Col(i); + // const value_type val_at_i = conj(a.Value(i)); + const value_type val_at_i = a.Value(i); + + // grab b2 + row_view_type &b2 = B.RowView(row_at_i); + + ordinal_type idx = 0; + for (ordinal_type j=0;j<nnz_b1 && (idx > -2);++j) { + // grab b1 + const ordinal_type col_at_j = b1.Col(j); + const value_type val_at_j = b1.Value(j); + + // check and update + idx = b2.Index(col_at_j, idx); + if (idx >= 0) + b2.Value(idx) -= val_at_i*val_at_j; + } + }); + member.team_barrier(); + } + } + } + + return 0; + } + +} + +#endif diff --git a/lib/kokkos/example/ichol/src/util.cpp b/lib/kokkos/example/ichol/src/util.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef220c48c1b7d58af2289dde4f226a7a102d63ee --- /dev/null +++ b/lib/kokkos/example/ichol/src/util.cpp @@ -0,0 +1,4 @@ + + +static int dummy = 1; + diff --git a/lib/kokkos/example/ichol/src/util.hpp b/lib/kokkos/example/ichol/src/util.hpp new file mode 100644 index 0000000000000000000000000000000000000000..020475bc52daad5c864d7caa8ed34d03157a0046 --- /dev/null +++ b/lib/kokkos/example/ichol/src/util.hpp @@ -0,0 +1,237 @@ +#pragma once +#ifndef __UTIL_HPP__ +#define __UTIL_HPP__ + +#include <stdio.h> +#include <string.h> + +#include <string> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <vector> +#include <set> +#include <map> +#include <algorithm> +#include <memory> + +#include <cmath> +#include <complex> + +#include <limits> + +/// \file util.hpp +/// \brief Utility functions and constant integer class like an enum class. +/// \author Kyungjoo Kim (kyukim@sandia.gov) +/// +/// This provides utility functions for implementing mini-app for incomplete +/// sparse matrix factorization with task-data parallelism e.g., parameter +/// classes, error handling, ostream << overloading. +/// +/// Note: The reference of the "static const int" members in the enum-like +/// classes should not be used as function arguments but their values only. + + +using namespace std; + +namespace Tacho { + +#undef CHKERR +#define CHKERR(ierr) \ + if (ierr != 0) { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << " : " << ierr << endl; } + +#define MSG_NOT_YET_IMPLEMENTED ">> Not yet implemented" +#define MSG_INVALID_INPUT(what) ">> Invaid input argument: " #what +#define MSG_INVALID_TEMPLATE_ARGS ">> Invaid template arguments" +#define ERROR(msg) \ + { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << endl << msg << endl; } + + // control id +#undef Ctrl +#define Ctrl(name,algo,variant) name<algo,variant> + + // control leaf +#undef CtrlComponent +#define CtrlComponent(name,algo,variant,component,id) \ + Ctrl(name,algo,variant)::component[id] + + // control recursion +#undef CtrlDetail +#define CtrlDetail(name,algo,variant,component) \ + CtrlComponent(name,algo,variant,component,0),CtrlComponent(name,algo,variant,component,1),name + + /// \class GraphHelper + class GraphHelper { + public: + static const int DefaultRandomSeed = -1; + }; + + + /// \class Partition + /// \brief Matrix partition parameters. + class Partition { + public: + static const int Top = 101; + static const int Bottom = 102; + + static const int Left = 201; + static const int Right = 202; + + static const int TopLeft = 401; + static const int TopRight = 402; + static const int BottomLeft = 403; + static const int BottomRight = 404; + }; + + /// \class Uplo + /// \brief Matrix upper/lower parameters. + class Uplo { + public: + static const int Upper = 501; + static const int Lower = 502; + }; + + /// \class Side + /// \brief Matrix left/right parameters. + class Side { + public: + static const int Left = 601; + static const int Right = 602; + }; + + /// \class Diag + /// \brief Matrix unit/non-unit diag parameters. + class Diag { + public: + static const int Unit = 701; + static const int NonUnit = 702; + }; + + /// \class Trans + /// \brief Matrix upper/lower parameters. + class Trans { + public: + static const int Transpose = 801; + static const int ConjTranspose = 802; + static const int NoTranspose = 803; + }; + + /// \class Loop + /// \brief outer/innner parameters + class Loop { + public: + static const int Outer = 901; + static const int Inner = 902; + static const int Fused = 903; + }; + + class Variant { + public: + static const int One = 1; + static const int Two = 2; + static const int Three = 3; + static const int Four = 4; + }; + + /// \class AlgoChol + /// \brief Algorithmic variants in sparse factorization and sparse BLAS operations. + class AlgoChol { + public: + // One side factorization on flat matrices + static const int Dummy = 1000; + static const int Unblocked = 1001; + static const int UnblockedOpt = 1002; + static const int Blocked = 1101; // testing only + + static const int RightLookByBlocks = 1201; // backbone structure is right looking + static const int ByBlocks = RightLookByBlocks; + + static const int NestedDenseBlock = 1211; + static const int NestedDenseByBlocks = 1212; + + static const int RightLookDenseByBlocks = 1221; + static const int DenseByBlocks = RightLookDenseByBlocks; + + static const int ExternalLapack = 1231; + static const int ExternalPardiso = 1232; + }; + + // aliasing name space + typedef AlgoChol AlgoTriSolve; + + class AlgoBlasLeaf { + public: + // One side factorization on flat matrices + static const int ForFactorBlocked = 2001; + + // B and C are dense matrices and used for solve phase + static const int ForTriSolveBlocked = 2011; + + static const int ExternalBlas = 2021; + }; + + class AlgoGemm : public AlgoBlasLeaf { + public: + static const int DenseByBlocks = 2101; + }; + + class AlgoTrsm : public AlgoBlasLeaf { + public: + static const int DenseByBlocks = 2201; + }; + + class AlgoHerk : public AlgoBlasLeaf { + public: + static const int DenseByBlocks = 2301; + }; + + /// \brief Interface for overloaded stream operators. + template<typename T> + inline + ostream& operator<<(ostream &os, const unique_ptr<T> &p) { + return p->showMe(os); + } + + /// \class Disp + /// \brief Interface for the stream operator. + class Disp { + friend ostream& operator<<(ostream &os, const Disp &disp); + public: + Disp() { } + virtual ostream& showMe(ostream &os) const { + return os; + } + }; + + /// \brief Implementation of the overloaded stream operator. + inline + ostream& operator<<(ostream &os, const Disp &disp) { + return disp.showMe(os); + } + + template<typename T> struct NumericTraits {}; + + template<> + struct NumericTraits<float> { + typedef float real_type; + static real_type epsilon() { return numeric_limits<float>::epsilon(); } + }; + template<> + struct NumericTraits<double> { + typedef double real_type; + static real_type epsilon() { return numeric_limits<double>::epsilon(); } + }; + template<> + struct NumericTraits<complex<float> > { + typedef float real_type; + static real_type epsilon() { return numeric_limits<float>::epsilon(); } + }; + template<> + struct NumericTraits<complex<double> > { + typedef double real_type; + static real_type epsilon() { return numeric_limits<double>::epsilon(); } + }; + +} + +#endif diff --git a/lib/kokkos/example/md_skeleton/CMakeLists.txt b/lib/kokkos/example/md_skeleton/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..28412c37847deb211db5b6256a78a0e904d8dcaf --- /dev/null +++ b/lib/kokkos/example/md_skeleton/CMakeLists.txt @@ -0,0 +1,16 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") +SET(LIBRARIES "") + +FILE(GLOB SOURCES *.cpp ) + +TRIBITS_ADD_EXECUTABLE( + md_skeleton + SOURCES ${SOURCES} + COMM serial mpi + DEPLIBS ${LIBRARIES} + ) + diff --git a/lib/kokkos/example/md_skeleton/Makefile b/lib/kokkos/example/md_skeleton/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbea3e09a5d71f900de85ff2100cf41bd5738 --- /dev/null +++ b/lib/kokkos/example/md_skeleton/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/md_skeleton/README b/lib/kokkos/example/md_skeleton/README new file mode 100644 index 0000000000000000000000000000000000000000..1ce682b0a6ec64175587d70c593e39ba8d304d75 --- /dev/null +++ b/lib/kokkos/example/md_skeleton/README @@ -0,0 +1,3 @@ +To build this example on a 2012-model Macbook Pro with NVIDIA Kepler GPU: + +./build.cuda_std g++_osx cuda_osx 30 opt diff --git a/lib/kokkos/example/md_skeleton/force.cpp b/lib/kokkos/example/md_skeleton/force.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a31944f859164e0aef5f9f29c90c59cb16cd526b --- /dev/null +++ b/lib/kokkos/example/md_skeleton/force.cpp @@ -0,0 +1,192 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/* Define values which set the max number of registers used for the Force Kernel + * Its 32 * 2048 / (KOKKOS_CUDA_MAX_THREADS * KOKKOS_CUDA_MIN_BLOCKS) + * Have to be set before including Kokkos header files. + */ + +#define KOKKOS_CUDA_MAX_THREADS 512 +#define KOKKOS_CUDA_MIN_BLOCKS 3 + +#include <system.h> +#include <cstdio> + + +/* Simple Lennard Jones Force Kernel using neighborlists + * Calculates for every pair of atoms (i,j) with distance smaller r_cut + * f_ij = 4*epsilon * ( (sigma/r_ij)^12 - (sigma/r_ij)^6 ) + * where r_ij is the distance of atoms (i,j). + * The force on atom i is the sum over f_ij: + * f_i = sum_j (f_ij) + * Neighborlists are used in order to pre calculate which atoms j are + * close enough to i to be able to contribute. By choosing a larger neighbor + * cutoff then the force cutoff, the neighbor list can be reused several times + * (typically 10 - 100). + */ + +struct ForceFunctor { + + typedef t_x_array::execution_space execution_space; //Device Type for running the kernel + typedef double2 value_type; // When energy calculation is requested return energy, and virial + + t_x_array_randomread x; //atom positions + t_f_array f; //atom forces + t_int_1d_const numneigh; //number of neighbors per atom + t_neighbors_const neighbors; //neighborlist + double cutforcesq; //force cutoff + double epsilon; //Potential parameter + double sigma6; //Potential parameter + + + ForceFunctor(System s) { + x = s.d_x; + f = s.f; + numneigh = s.numneigh; + neighbors = s.neighbors; + cutforcesq = s.force_cutsq; + epsilon = 1.0; + sigma6 = 1.0; + } + + /* Operator for not calculating energy and virial */ + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + force<0>(i); + } + + /* Operator for calculating energy and virial */ + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i, double2 &energy_virial) const { + double2 ev = force<1>(i); + energy_virial.x += ev.x; + energy_virial.y += ev.y; + } + + template<int EVFLAG> + KOKKOS_INLINE_FUNCTION + double2 force(const int &i) const + { + const int numneighs = numneigh[i]; + const double xtmp = x(i, 0); + const double ytmp = x(i, 1); + const double ztmp = x(i, 2); + double fix = 0; + double fiy = 0; + double fiz = 0; + double energy = 0; + double virial = 0; + + //pragma simd forces vectorization (ignoring the performance objections of the compiler) + //give hint to compiler that fix, fiy and fiz are used for reduction only + + #ifdef USE_SIMD + #pragma simd reduction (+: fix,fiy,fiz,energy,virial) + #endif + for(int k = 0; k < numneighs; k++) { + const int j = neighbors(i, k); + const double delx = xtmp - x(j, 0); + const double dely = ytmp - x(j, 1); + const double delz = ztmp - x(j, 2); + const double rsq = delx * delx + dely * dely + delz * delz; + + //if(i==0) printf("%i %i %lf %lf\n",i,j,rsq,cutforcesq); + if(rsq < cutforcesq) { + const double sr2 = 1.0 / rsq; + const double sr6 = sr2 * sr2 * sr2 * sigma6; + const double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; + fix += delx * force; + fiy += dely * force; + fiz += delz * force; + + if(EVFLAG) { + energy += sr6 * (sr6 - 1.0) * epsilon; + virial += delx * delx * force + dely * dely * force + delz * delz * force; + } + } + } + + f(i, 0) += fix; + f(i, 1) += fiy; + f(i, 2) += fiz; + + double2 energy_virial ; + energy_virial.x = 4.0 * energy ; + energy_virial.y = 0.5 * virial ; + return energy_virial; + } + + /* init and join functions when doing the reduction to obtain energy and virial */ + + KOKKOS_FUNCTION + static void init(volatile value_type &update) { + update.x = update.y = 0; + } + KOKKOS_FUNCTION + static void join(volatile value_type &update , + const volatile value_type &source) { + update.x += source.x ; + update.y += source.y ; + } + +}; + + +/* Calling function */ + +double2 force(System &s,int evflag) { + + ForceFunctor f(s); + + double2 ev ; ev.x = 0 ; ev.y = 0 ; + if(!evflag) + Kokkos::parallel_for(s.nlocal,f); + else + Kokkos::parallel_reduce(s.nlocal,f,ev); + + execution_space::fence(); + return ev; +} + diff --git a/lib/kokkos/example/md_skeleton/main.cpp b/lib/kokkos/example/md_skeleton/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..58cf76cab014fde0183d80a5dae347069a72f874 --- /dev/null +++ b/lib/kokkos/example/md_skeleton/main.cpp @@ -0,0 +1,205 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> +#include <cstring> +#include <cstdlib> +#include "system.h" + +int create_system(System &system, int nx, int ny, int nz, double rho); +int neigh_setup(System &system); +int neigh_build(System &system); +double2 force(System &system,int evflag); + +/* simple MD Skeleton which + * - constructs a simple FCC lattice, + * - computes a neighborlist + * - compute LJ-Force kernel a number of times + */ + +int main(int argc, char** argv) { + + printf("Running MD Skeleton\n"); + /* Thread numbers for Host */ + + int num_threads = 1; + int teams = 1; + int device = 0; // Default device for GPU runs + + /* avoid unused variable warnings */ + (void)num_threads; + (void)teams; + (void)device; + + /* Default value for number of force calculations */ + + int iter = 100; + + /* Default value for system size (4*nx*ny*nz atoms) + * nx, ny and nz are set to system_size if not specified on commandline */ + + int system_size = 20; + int nx = -1; + int ny = -1; + int nz = -1; + + int neighbor_size = 1; // Default bin size for neighbor list construction + + double rho = 0.8442; // Number density of the system + double delta = 0; // Scaling factor for random offsets of atom positions + + + /* read in command-line arguments */ + + for(int i = 0; i < argc; i++) { + if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) { + num_threads = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "--teams") == 0)) { + teams = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-d") == 0) || (strcmp(argv[i], "--device") == 0)) { + device = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "--delta") == 0)) { + delta = atof(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--iter") == 0)) { + iter = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-rho") == 0)) { + rho = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) { + system_size = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-nx") == 0)) { + nx = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-ny") == 0)) { + ny = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-nz") == 0)) { + nz = atoi(argv[++i]); + continue; + } + + if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) { + neighbor_size = atoi(argv[++i]); + continue; + } + } + + if( nx < 0 ) nx = system_size; + if( ny < 0 ) ny = system_size; + if( nz < 0 ) nz = system_size; + + printf("-> Init Device\n"); + +#if defined( KOKKOS_HAVE_CUDA ) + Kokkos::HostSpace::execution_space::initialize(teams*num_threads); + Kokkos::Cuda::SelectDevice select_device(device); + Kokkos::Cuda::initialize(select_device); +#elif defined( KOKKOS_HAVE_OPENMP ) + Kokkos::OpenMP::initialize(teams*num_threads); +#elif defined( KOKKOS_HAVE_PTHREAD ) + Kokkos::Threads::initialize(teams*num_threads); +#endif + + System system; + system.neigh_cut = 2.8; + system.force_cut = 2.5; + system.force_cutsq = system.force_cut*system.force_cut; + system.delta = delta; + + printf("-> Build system\n"); + create_system(system,nx,ny,nz,rho); + + printf("-> Created %i atoms and %i ghost atoms\n",system.nlocal,system.nghost); + + system.nbinx = system.box.xprd/neighbor_size+1; + system.nbiny = system.box.yprd/neighbor_size+1; + system.nbinz = system.box.zprd/neighbor_size+1; + + + printf("-> Building Neighborlist\n"); + + neigh_setup(system); + neigh_build(system); + + double2 ev = force(system,1); + + printf("-> Calculate Energy: %f Virial: %f\n",ev.x,ev.y); + + printf("-> Running %i force calculations\n",iter); + + Kokkos::Timer timer; + + for(int i=0;i<iter;i++) { + force(system,0); + } + + + double time = timer.seconds(); + printf("Time: %e s for %i iterations with %i atoms\n",time,iter,system.nlocal); + + execution_space::finalize(); +} diff --git a/lib/kokkos/example/md_skeleton/neighbor.cpp b/lib/kokkos/example/md_skeleton/neighbor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a77932946f9dec0badb133ac1d6a08d5465d240 --- /dev/null +++ b/lib/kokkos/example/md_skeleton/neighbor.cpp @@ -0,0 +1,430 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <system.h> +#include <cstdio> +#include <Kokkos_Core.hpp> + +#define SMALL 1.0e-6 +#define FACTOR 0.999 + +/* BinningFunctor puts atoms into bins of the simulation box + * Neighborlists are then created by checking only distances of atoms + * in adjacent bins. That makes neighborlist construction a O(N) operation. + */ + +struct BinningFunctor { + typedef t_int_2d::execution_space execution_space; + + System s; + + int atoms_per_bin; + + BinningFunctor(System _s): s(_s) { + atoms_per_bin = s.bins.dimension_1(); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const + { + const int ibin = coord2bin(s.d_x(i, 0), s.d_x(i, 1), s.d_x(i, 2)); + + const int ac = Kokkos::atomic_fetch_add(&s.bincount[ibin], 1); + + if(ac < atoms_per_bin) { + s.bins(ibin, ac) = i; + } else if(s.d_resize(0) < ac) { + s.d_resize(0) = ac; + } + } + + KOKKOS_INLINE_FUNCTION + int coord2bin(double x, double y, double z) const + { + int ix, iy, iz; + + if(x >= s.box.xprd) + ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo; + else if(x >= 0.0) + ix = (int)(x * s.bininvx) - s.mbinxlo; + else + ix = (int)(x * s.bininvx) - s.mbinxlo - 1; + + if(y >= s.box.yprd) + iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo; + else if(y >= 0.0) + iy = (int)(y * s.bininvy) - s.mbinylo; + else + iy = (int)(y * s.bininvy) - s.mbinylo - 1; + + if(z >= s.box.zprd) + iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo; + else if(z >= 0.0) + iz = (int)(z * s.bininvz) - s.mbinzlo; + else + iz = (int)(z * s.bininvz) - s.mbinzlo - 1; + + return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1); + } +}; + +/* Build the actual neighborlist*/ + +struct BuildFunctor { + + typedef t_int_2d::execution_space execution_space; + + System s; + + int maxneighs; + BuildFunctor(System _s): s(_s) { + maxneighs = s.neighbors.dimension_1(); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const + { + int n = 0; + + const t_int_1d_const_um bincount_c = s.bincount; + + const double xtmp = s.d_x(i, 0); + const double ytmp = s.d_x(i, 1); + const double ztmp = s.d_x(i, 2); + + const int ibin = coord2bin(xtmp, ytmp, ztmp); + + // loop over all bins in neighborhood (includes ibin) + for(int k = 0; k < s.nstencil; k++) { + const int jbin = ibin + s.d_stencil[k]; + + // get subview of jbin + const t_int_1d_const_um loc_bin = + Kokkos::subview(s.bins,jbin,Kokkos::ALL()); + + if(ibin == jbin) + for(int m = 0; m < bincount_c[jbin]; m++) { + const int j = loc_bin[m]; + + //for same bin as atom i skip j if i==j + if (j == i) continue; + + const double delx = xtmp - s.d_x(j, 0); + const double dely = ytmp - s.d_x(j, 1); + const double delz = ztmp - s.d_x(j, 2); + const double rsq = delx * delx + dely * dely + delz * delz; + + if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j; + } + else { + for(int m = 0; m < bincount_c[jbin]; m++) { + const int j = loc_bin[m]; + + const double delx = xtmp - s.d_x(j, 0); + const double dely = ytmp - s.d_x(j, 1); + const double delz = ztmp - s.d_x(j, 2); + const double rsq = delx * delx + dely * dely + delz * delz; + + if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j; + } + } + } + + s.numneigh[i] = n; + + if(n >= maxneighs) { + if(n >= s.d_resize(0)) s.d_resize(0) = n; + } + } + + KOKKOS_INLINE_FUNCTION + int coord2bin(double x, double y, double z) const + { + int ix, iy, iz; + + if(x >= s.box.xprd) + ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo; + else if(x >= 0.0) + ix = (int)(x * s.bininvx) - s.mbinxlo; + else + ix = (int)(x * s.bininvx) - s.mbinxlo - 1; + + if(y >= s.box.yprd) + iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo; + else if(y >= 0.0) + iy = (int)(y * s.bininvy) - s.mbinylo; + else + iy = (int)(y * s.bininvy) - s.mbinylo - 1; + + if(z >= s.box.zprd) + iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo; + else if(z >= 0.0) + iz = (int)(z * s.bininvz) - s.mbinzlo; + else + iz = (int)(z * s.bininvz) - s.mbinzlo - 1; + + return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1); + } +}; + +/* Reset an array to zero */ + +struct MemsetZeroFunctor { + typedef t_x_array::execution_space execution_space ; + void* ptr; + KOKKOS_INLINE_FUNCTION void operator()(const int i) const { + ((int*)ptr)[i] = 0; + } +}; + +/* Calculate distance of two bins */ + +double bindist(System &s, int i, int j, int k) +{ + double delx, dely, delz; + + if(i > 0) + delx = (i - 1) * s.binsizex; + else if(i == 0) + delx = 0.0; + else + delx = (i + 1) * s.binsizex; + + if(j > 0) + dely = (j - 1) * s.binsizey; + else if(j == 0) + dely = 0.0; + else + dely = (j + 1) * s.binsizey; + + if(k > 0) + delz = (k - 1) * s.binsizez; + else if(k == 0) + delz = 0.0; + else + delz = (k + 1) * s.binsizez; + + return (delx * delx + dely * dely + delz * delz); +} + +/* Setup the neighborlist construction + * Determine binsizes, a stencil for defining adjacency, etc. + */ + +void neigh_setup(System &s) { + + s.neigh_cutsq = s.neigh_cut * s.neigh_cut; + + /* + c bins must evenly divide into box size, + c becoming larger than cutneigh if necessary + c binsize = 1/2 of cutoff is near optimal + + if (flag == 0) { + nbinx = 2.0 * xprd / cutneigh; + nbiny = 2.0 * yprd / cutneigh; + nbinz = 2.0 * zprd / cutneigh; + if (nbinx == 0) nbinx = 1; + if (nbiny == 0) nbiny = 1; + if (nbinz == 0) nbinz = 1; + } + */ + + s.binsizex = s.box.xprd / s.nbinx; + s.binsizey = s.box.yprd / s.nbiny; + s.binsizez = s.box.zprd / s.nbinz; + s.bininvx = 1.0 / s.binsizex; + s.bininvy = 1.0 / s.binsizey; + s.bininvz = 1.0 / s.binsizez; + + double coord = s.box.xlo - s.neigh_cut - SMALL * s.box.xprd; + s.mbinxlo = static_cast<int>(coord * s.bininvx); + + if(coord < 0.0) s.mbinxlo = s.mbinxlo - 1; + + coord = s.box.xhi + s.neigh_cut + SMALL * s.box.xprd; + int mbinxhi = static_cast<int>(coord * s.bininvx); + + coord = s.box.ylo - s.neigh_cut - SMALL * s.box.yprd; + s.mbinylo = static_cast<int>(coord * s.bininvy); + + if(coord < 0.0) s.mbinylo = s.mbinylo - 1; + + coord = s.box.yhi + s.neigh_cut + SMALL * s.box.yprd; + int mbinyhi = static_cast<int>(coord * s.bininvy); + + coord = s.box.zlo - s.neigh_cut - SMALL * s.box.zprd; + s.mbinzlo = static_cast<int>(coord * s.bininvz); + + if(coord < 0.0) s.mbinzlo = s.mbinzlo - 1; + + coord = s.box.zhi + s.neigh_cut + SMALL * s.box.zprd; + int mbinzhi = static_cast<int>(coord * s.bininvz); + + /* extend bins by 1 in each direction to insure stencil coverage */ + + s.mbinxlo = s.mbinxlo - 1; + mbinxhi = mbinxhi + 1; + s.mbinx = mbinxhi - s.mbinxlo + 1; + + s.mbinylo = s.mbinylo - 1; + mbinyhi = mbinyhi + 1; + s.mbiny = mbinyhi - s.mbinylo + 1; + + s.mbinzlo = s.mbinzlo - 1; + mbinzhi = mbinzhi + 1; + s.mbinz = mbinzhi - s.mbinzlo + 1; + + /* + compute bin stencil of all bins whose closest corner to central bin + is within neighbor cutoff + for partial Newton (newton = 0), + stencil is all surrounding bins including self + for full Newton (newton = 1), + stencil is bins to the "upper right" of central bin, does NOT include self + next(xyz) = how far the stencil could possibly extend + factor < 1.0 for special case of LJ benchmark so code will create + correct-size stencil when there are 3 bins for every 5 lattice spacings + */ + + int nextx = static_cast<int>(s.neigh_cut * s.bininvx); + + if(nextx * s.binsizex < FACTOR * s.neigh_cut) nextx++; + + int nexty = static_cast<int>(s.neigh_cut * s.bininvy); + + if(nexty * s.binsizey < FACTOR * s.neigh_cut) nexty++; + + int nextz = static_cast<int>(s.neigh_cut * s.bininvz); + + if(nextz * s.binsizez < FACTOR * s.neigh_cut) nextz++; + + int nmax = (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1); + s.d_stencil = t_int_1d("stencil", nmax); + s.h_stencil = Kokkos::create_mirror_view(s.d_stencil); + s.nstencil = 0; + int kstart = -nextz; + + for(int k = kstart; k <= nextz; k++) { + for(int j = -nexty; j <= nexty; j++) { + for(int i = -nextx; i <= nextx; i++) { + if(bindist(s,i, j, k) < s.neigh_cutsq) { + s.h_stencil(s.nstencil++) = k * s.mbiny * s.mbinx + j * s.mbinx + i; + } + } + } + } + + /* Allocate neighbor arrays */ + + Kokkos::deep_copy(s.d_stencil, s.h_stencil); + s.mbins = s.mbinx * s.mbiny * s.mbinz; + s.bincount = t_int_1d("bincount", s.mbins); + s.bins = t_int_2d("bins", s.mbins, 8); + + s.neighbors = t_neighbors("neighbors",s.natoms,80); + s.numneigh = t_int_1d("numneigh",s.natoms); + s.d_resize = t_int_scalar("resize"); + s.h_resize = Kokkos::create_mirror_view(s.d_resize); +} + + +/* Build the neighborlist + * This is a try and rerun algorithm for handling the case where the bins array + * and the neighbors array are not big enough. So if one is too small, it will + * reallocate and rerun the binnind algorithm or the neighborlist construction. + */ + +void neigh_build(System &s) { + + /* Binning of atoms */ + + s.h_resize(0) = 1; + + while(s.h_resize(0) > 0) { + s.h_resize(0) = 0; + Kokkos::deep_copy(s.d_resize, s.h_resize); + + MemsetZeroFunctor f_zero; + f_zero.ptr = (void*) s.bincount.ptr_on_device(); + Kokkos::parallel_for(s.mbins, f_zero); + execution_space::fence(); + + BinningFunctor f(s); + Kokkos::parallel_for(s.natoms, f); + execution_space::fence(); + + /* Check if bins was large enough, if nor reallocated and rerun */ + + deep_copy(s.h_resize, s.d_resize); + + if(s.h_resize(0)) { + int atoms_per_bin = s.h_resize(0)+2; + s.bins = t_int_2d("bins", s.mbins, atoms_per_bin); + } + } + + /* Neighborlist construction */ + + s.h_resize(0) = 1; + + while(s.h_resize(0)) { + s.h_resize(0) = 0; + + Kokkos::deep_copy(s.d_resize, s.h_resize); + + BuildFunctor f(s); + Kokkos::parallel_for(s.nlocal, f); + + execution_space::fence(); + + /* Check if neighbors was large enough, if nor reallocated and rerun */ + + deep_copy(s.h_resize, s.d_resize); + + if(s.h_resize(0)) { + int maxneighs = s.h_resize(0) * 1.2; + s.neighbors = t_neighbors("neighbors", s.natoms, maxneighs); + } + } +} diff --git a/lib/kokkos/example/md_skeleton/setup.cpp b/lib/kokkos/example/md_skeleton/setup.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7815a89101ce53dbe7c7b9cf51e0ab314acbd36b --- /dev/null +++ b/lib/kokkos/example/md_skeleton/setup.cpp @@ -0,0 +1,271 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <system.h> +#include <cmath> +#include <cstdio> +#include <cstdlib> +/* initialize atoms on fcc lattice in parallel fashion */ + +#define MAX(a,b) (a>b?a:b) +#define MIN(a,b) (a<b?a:b) + + +int create_system(System &system, int nx, int ny, int nz, double rho) +{ + /* Box Setup */ + + double lattice = pow((4.0 / rho), (1.0 / 3.0)); + system.box.xprd = nx * lattice; + system.box.yprd = ny * lattice; + system.box.zprd = nz * lattice; + system.box.xlo = 0; + system.box.ylo = 0; + system.box.zlo = 0; + system.box.xhi = system.box.xprd; + system.box.yhi = system.box.yprd; + system.box.zhi = system.box.zprd; + + + int ghost_dist = int(system.neigh_cut/lattice) + 1; + + /* total # of atoms */ + + system.nlocal = 4 * nx * ny * nz; + system.nghost = 4 * (nx + 2 * ghost_dist) * + (ny + 2 * ghost_dist) * + (nz + 2 * ghost_dist) - + system.nlocal; + system.natoms = system.nlocal + system.nghost; + + system.d_x = t_x_array("X",system.natoms); + system.h_x = Kokkos::create_mirror_view(system.d_x); + system.f = t_f_array("F",system.natoms); + + /* determine loop bounds of lattice subsection that overlaps my sub-box + insure loop bounds do not exceed nx,ny,nz */ + + double alat = pow((4.0 / rho), (1.0 / 3.0)); + int ilo = static_cast<int>(system.box.xlo / (0.5 * alat) - 1); + int ihi = static_cast<int>(system.box.xhi / (0.5 * alat) + 1); + int jlo = static_cast<int>(system.box.ylo / (0.5 * alat) - 1); + int jhi = static_cast<int>(system.box.yhi / (0.5 * alat) + 1); + int klo = static_cast<int>(system.box.zlo / (0.5 * alat) - 1); + int khi = static_cast<int>(system.box.zhi / (0.5 * alat) + 1); + + ilo = MAX(ilo, 0); + ihi = MIN(ihi, 2 * nx - 1); + jlo = MAX(jlo, 0); + jhi = MIN(jhi, 2 * ny - 1); + klo = MAX(klo, 0); + khi = MIN(khi, 2 * nz - 1); + + + + /* generates positions of atoms on fcc sublattice*/ + + srand(3718273); + /* create non-ghost atoms */ + { + double xtmp, ytmp, ztmp; + int sx = 0; + int sy = 0; + int sz = 0; + int ox = 0; + int oy = 0; + int oz = 0; + int subboxdim = 8; + + int n = 0; + int iflag = 0; + + while(oz * subboxdim <= khi) { + const int k = oz * subboxdim + sz; + const int j = oy * subboxdim + sy; + const int i = ox * subboxdim + sx; + + if(iflag) continue; + + if(((i + j + k) % 2 == 0) && + (i >= ilo) && (i <= ihi) && + (j >= jlo) && (j <= jhi) && + (k >= klo) && (k <= khi)) { + + const int nold = n; + while(nold == n) { + xtmp = 0.5 * alat * i + system.delta/1000*(rand()%1000-500); + ytmp = 0.5 * alat * j + system.delta/1000*(rand()%1000-500); + ztmp = 0.5 * alat * k + system.delta/1000*(rand()%1000-500); + + if(xtmp >= system.box.xlo && xtmp < system.box.xhi && + ytmp >= system.box.ylo && ytmp < system.box.yhi && + ztmp >= system.box.zlo && ztmp < system.box.zhi) { + system.h_x(n,0) = xtmp; + system.h_x(n,1) = ytmp; + system.h_x(n,2) = ztmp; + n++; + } + } + } + + sx++; + + if(sx == subboxdim) { + sx = 0; + sy++; + } + + if(sy == subboxdim) { + sy = 0; + sz++; + } + + if(sz == subboxdim) { + sz = 0; + ox++; + } + + if(ox * subboxdim > ihi) { + ox = 0; + oy++; + } + + if(oy * subboxdim > jhi) { + oy = 0; + oz++; + } + } + + /* check that correct # of atoms were created */ + + if(system.nlocal != n) { + printf("Created incorrect # of atoms\n"); + + return 1; + } + } + + /* create ghost atoms */ + + { + double xtmp, ytmp, ztmp; + + int ilo_g = ilo - 2 * ghost_dist; + int jlo_g = jlo - 2 * ghost_dist; + int klo_g = klo - 2 * ghost_dist; + int ihi_g = ihi + 2 * ghost_dist; + int jhi_g = jhi + 2 * ghost_dist; + int khi_g = khi + 2 * ghost_dist; + + int subboxdim = 8; + int sx = 0; + int sy = 0; + int sz = 0; + int ox = subboxdim * ilo_g; + int oy = subboxdim * jlo_g; + int oz = subboxdim * klo_g; + + int n = system.nlocal; + int iflag = 0; + + + while(oz * subboxdim <= khi_g) { + const int k = oz * subboxdim + sz; + const int j = oy * subboxdim + sy; + const int i = ox * subboxdim + sx; + + if(iflag) continue; + + if(((i + j + k) % 2 == 0) && + (i >= ilo_g) && (i <= ihi_g) && + (j >= jlo_g) && (j <= jhi_g) && + (k >= klo_g) && (k <= khi_g) && + ((i < ilo) || (i > ihi) || + (j < jlo) || (j > jhi) || + (k < klo) || (k > khi)) + ) { + + xtmp = 0.5 * alat * i; + ytmp = 0.5 * alat * j; + ztmp = 0.5 * alat * k; + + system.h_x(n,0) = xtmp + system.delta/1000*(rand()%1000-500);; + system.h_x(n,1) = ytmp + system.delta/1000*(rand()%1000-500);; + system.h_x(n,2) = ztmp + system.delta/1000*(rand()%1000-500);; + n++; + } + + sx++; + + if(sx == subboxdim) { + sx = 0; + sy++; + } + + if(sy == subboxdim) { + sy = 0; + sz++; + } + + if(sz == subboxdim) { + sz = 0; + ox++; + //printf("%i %i %i // %i %i %i\n",ox,oy,oz,i,j,k); + } + + if(ox * subboxdim > ihi_g) { + ox = subboxdim * ilo_g; + oy++; + } + + if(oy * subboxdim > jhi_g) { + oy = subboxdim * jlo_g; + oz++; + } + } + } + + Kokkos::deep_copy(system.d_x,system.h_x); + return 0; +} + diff --git a/lib/kokkos/example/md_skeleton/system.h b/lib/kokkos/example/md_skeleton/system.h new file mode 100644 index 0000000000000000000000000000000000000000..0184a119ff2d260442e624cd1f5e8a890cefe24f --- /dev/null +++ b/lib/kokkos/example/md_skeleton/system.h @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef SYSTEM_H_ +#define SYSTEM_H_ + +#include <types.h> + +struct Box { + double xprd, yprd, zprd; + double xlo, xhi; + double ylo, yhi; + double zlo, zhi; +}; + +struct System { + Box box; + + int natoms; + int nlocal; + int nghost; + + t_x_array d_x; + t_x_array_host h_x; + + t_f_array f; + + t_neighbors neighbors; + t_int_1d numneigh; + + double delta; + + double neigh_cut,neigh_cutsq; + + int mbins; + int nbinx,nbiny,nbinz; + int mbinx,mbiny,mbinz; + int mbinxlo,mbinylo,mbinzlo; + double binsizex,binsizey,binsizez; + double bininvx,bininvy,bininvz; + + t_int_1d bincount; + t_int_2d bins; + t_int_scalar d_resize; + t_int_scalar_host h_resize; + t_int_1d d_stencil; + t_int_1d_host h_stencil; + int nstencil; + + double force_cut,force_cutsq; +}; +#endif diff --git a/lib/kokkos/example/md_skeleton/types.h b/lib/kokkos/example/md_skeleton/types.h new file mode 100644 index 0000000000000000000000000000000000000000..7f92b7cd0f8089d93c1e18e5dff3ad1508316867 --- /dev/null +++ b/lib/kokkos/example/md_skeleton/types.h @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TYPES_H_ +#define TYPES_H_ + +/* Determine default device type and necessary includes */ + +#include <Kokkos_Core.hpp> + +typedef Kokkos::DefaultExecutionSpace execution_space ; + +#if ! defined( KOKKOS_HAVE_CUDA ) + struct double2 { + double x, y; + KOKKOS_INLINE_FUNCTION + double2(double xinit, double yinit) { + x = xinit; + y = yinit; + } + KOKKOS_INLINE_FUNCTION + double2() { + x = 0.0; + y = 0.0; + } + KOKKOS_INLINE_FUNCTION + double2& operator += (const double2& src) { + x+=src.x; + y+=src.y; + return *this; + } + + KOKKOS_INLINE_FUNCTION + volatile double2& operator += (const volatile double2& src) volatile { + x+=src.x; + y+=src.y; + return *this; + } + + }; +#endif + +#include <impl/Kokkos_Timer.hpp> + +/* Define types used throughout the code */ + +//Position arrays +typedef Kokkos::View<double*[3], Kokkos::LayoutRight, execution_space> t_x_array ; +typedef t_x_array::HostMirror t_x_array_host ; +typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space> t_x_array_const ; +typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space, Kokkos::MemoryRandomAccess > t_x_array_randomread ; + +//Force array +typedef Kokkos::View<double*[3], execution_space> t_f_array ; + + +//Neighborlist +typedef Kokkos::View<int**, execution_space > t_neighbors ; +typedef Kokkos::View<const int**, execution_space > t_neighbors_const ; +typedef Kokkos::View<int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_sub ; +typedef Kokkos::View<const int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_const_sub ; + +//1d int array +typedef Kokkos::View<int*, execution_space > t_int_1d ; +typedef t_int_1d::HostMirror t_int_1d_host ; +typedef Kokkos::View<const int*, execution_space > t_int_1d_const ; +typedef Kokkos::View<int*, execution_space , Kokkos::MemoryUnmanaged> t_int_1d_um ; +typedef Kokkos::View<const int* , execution_space , Kokkos::MemoryUnmanaged> t_int_1d_const_um ; + +//2d int array +typedef Kokkos::View<int**, Kokkos::LayoutRight, execution_space > t_int_2d ; +typedef t_int_2d::HostMirror t_int_2d_host ; + +//Scalar ints +typedef Kokkos::View<int[1], Kokkos::LayoutLeft, execution_space> t_int_scalar ; +typedef t_int_scalar::HostMirror t_int_scalar_host ; + +#endif /* TYPES_H_ */ diff --git a/lib/kokkos/example/multi_fem/BoxMeshFixture.hpp b/lib/kokkos/example/multi_fem/BoxMeshFixture.hpp new file mode 100644 index 0000000000000000000000000000000000000000..195bb4a6ae483dfc3ef07b78b17de4dea9b02226 --- /dev/null +++ b/lib/kokkos/example/multi_fem/BoxMeshFixture.hpp @@ -0,0 +1,610 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BOXMESHFIXTURE_HPP +#define KOKKOS_BOXMESHFIXTURE_HPP + +#include <cmath> +#include <stdexcept> +#include <sstream> + +#include <Kokkos_Core.hpp> +#include <BoxMeshPartition.hpp> +#include <FEMesh.hpp> +#include <HexElement.hpp> + +//---------------------------------------------------------------------------- + +struct FixtureElementHex8 { + + static const unsigned element_node_count = 8 ; + + HybridFEM::HexElement_TensorData< element_node_count > elem_data ; + BoxBoundsLinear box_bounds ; + + FixtureElementHex8() : elem_data(), box_bounds() {} + + static void create_node_boxes_from_vertex_boxes( + const BoxType & vertex_box_global , + const std::vector< BoxType > & vertex_box_parts , + BoxType & node_box_global , + std::vector< BoxType > & node_box_parts ) + { + node_box_global = vertex_box_global ; + node_box_parts = vertex_box_parts ; + } + + void elem_to_node( const unsigned node_local , unsigned coord[] ) const + { + coord[0] += elem_data.eval_map[ node_local ][0] ; + coord[1] += elem_data.eval_map[ node_local ][1] ; + coord[2] += elem_data.eval_map[ node_local ][2] ; + } +}; + +struct FixtureElementHex27 { + static const unsigned element_node_count = 27 ; + + HybridFEM::HexElement_TensorData< element_node_count > elem_data ; + BoxBoundsQuadratic box_bounds ; + + FixtureElementHex27() : elem_data(), box_bounds() {} + + static void create_node_boxes_from_vertex_boxes( + const BoxType & vertex_box_global , + const std::vector< BoxType > & vertex_box_parts , + BoxType & node_box_global , + std::vector< BoxType > & node_box_parts ) + { + node_box_global = vertex_box_global ; + node_box_parts = vertex_box_parts ; + + node_box_global[0][1] = 2 * node_box_global[0][1] - 1 ; + node_box_global[1][1] = 2 * node_box_global[1][1] - 1 ; + node_box_global[2][1] = 2 * node_box_global[2][1] - 1 ; + + for ( unsigned i = 0 ; i < vertex_box_parts.size() ; ++i ) { + node_box_parts[i][0][0] = 2 * node_box_parts[i][0][0] ; + node_box_parts[i][1][0] = 2 * node_box_parts[i][1][0] ; + node_box_parts[i][2][0] = 2 * node_box_parts[i][2][0] ; + + node_box_parts[i][0][1] = + std::min( node_box_global[0][1] , 2 * node_box_parts[i][0][1] ); + node_box_parts[i][1][1] = + std::min( node_box_global[1][1] , 2 * node_box_parts[i][1][1] ); + node_box_parts[i][2][1] = + std::min( node_box_global[2][1] , 2 * node_box_parts[i][2][1] ); + } + } + + void elem_to_node( const unsigned node_local , unsigned coord[] ) const + { + coord[0] = 2 * coord[0] + elem_data.eval_map[ node_local ][0] ; + coord[1] = 2 * coord[1] + elem_data.eval_map[ node_local ][1] ; + coord[2] = 2 * coord[2] + elem_data.eval_map[ node_local ][2] ; + } +}; + +//---------------------------------------------------------------------------- + +template< typename Scalar , class Device , class ElementSpec > +struct BoxMeshFixture { + + typedef Scalar coordinate_scalar_type ; + typedef Device execution_space ; + + static const unsigned element_node_count = ElementSpec::element_node_count ; + + typedef HybridFEM::FEMesh< coordinate_scalar_type , + element_node_count , + execution_space > FEMeshType ; + + typedef typename FEMeshType::node_coords_type node_coords_type ; + typedef typename FEMeshType::elem_node_ids_type elem_node_ids_type ; + typedef typename FEMeshType::node_elem_ids_type node_elem_ids_type ; + + + static void verify( + const typename FEMeshType::node_coords_type::HostMirror & node_coords , + const typename FEMeshType::elem_node_ids_type::HostMirror & elem_node_ids , + const typename FEMeshType::node_elem_ids_type::HostMirror & node_elem_ids ) + { + typedef typename FEMeshType::size_type size_type ; + //typedef typename node_coords_type::value_type coords_type ; // unused + + const size_type node_count_total = node_coords.dimension_0(); + const size_type elem_count_total = elem_node_ids.dimension_0(); + + const ElementSpec element ; + + for ( size_type node_index = 0 ; + node_index < node_count_total ; ++node_index ) { + + for ( size_type + j = node_elem_ids.row_map[ node_index ] ; + j < node_elem_ids.row_map[ node_index + 1 ] ; ++j ) { + + const size_type elem_index = node_elem_ids.entries(j,0); + const size_type node_local = node_elem_ids.entries(j,1); + const size_type en_id = elem_node_ids(elem_index,node_local); + + if ( node_index != en_id ) { + std::ostringstream msg ; + msg << "BoxMeshFixture node_elem_ids error" + << " : node_index(" << node_index + << ") entry(" << j + << ") elem_index(" << elem_index + << ") node_local(" << node_local + << ") elem_node_id(" << en_id + << ")" ; + throw std::runtime_error( msg.str() ); + } + } + } + + for ( size_type elem_index = 0 ; + elem_index < elem_count_total; ++elem_index ) { + + coordinate_scalar_type elem_node_coord[ element_node_count ][3] ; + + for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) { + const size_type node_index = elem_node_ids( elem_index , nn ); + + for ( size_type nc = 0 ; nc < 3 ; ++nc ) { + elem_node_coord[nn][nc] = node_coords( node_index , nc ); + } + } + + + for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) { + + const unsigned ix = element.elem_data.eval_map[nn][0] ; + const unsigned iy = element.elem_data.eval_map[nn][1] ; + const unsigned iz = element.elem_data.eval_map[nn][2] ; + + if ( elem_node_coord[nn][0] != elem_node_coord[0][0] + ix || + elem_node_coord[nn][1] != elem_node_coord[0][1] + iy || + elem_node_coord[nn][2] != elem_node_coord[0][2] + iz ) { + + std::ostringstream msg ; + msg << "BoxMeshFixture elem_node_coord mapping failure { " + << elem_node_coord[nn][0] << " " + << elem_node_coord[nn][1] << " " + << elem_node_coord[nn][2] << " } != { " + << elem_node_coord[ 0][0] + ix << " " + << elem_node_coord[ 0][1] + iy << " " + << elem_node_coord[ 0][2] + iz + << " }" ; + throw std::runtime_error( msg.str() ); + } + } + } + } + + //------------------------------------ + // Initialize element-node connectivity: + // Order elements that only depend on owned nodes first. + // These elements could be computed while waiting for + // received node data. + + static void layout_elements_interior_exterior( + const BoxType vertex_box_local_used , + const BoxType vertex_box_local_owned , + const BoxType node_box_local_used , + const std::vector<size_t> & node_used_id_map , + const ElementSpec element_fixture , + const size_t elem_count_interior , + const typename elem_node_ids_type::HostMirror elem_node_ids ) + { + size_t elem_index_interior = 0 ; + size_t elem_index_boundary = elem_count_interior ; + + for ( size_t iz = vertex_box_local_used[2][0] ; + iz < vertex_box_local_used[2][1] - 1 ; ++iz ) { + for ( size_t iy = vertex_box_local_used[1][0] ; + iy < vertex_box_local_used[1][1] - 1 ; ++iy ) { + for ( size_t ix = vertex_box_local_used[0][0] ; + ix < vertex_box_local_used[0][1] - 1 ; ++ix ) { + + size_t elem_index ; + + // If lower and upper vertices are owned then element is interior + if ( contain( vertex_box_local_owned, ix, iy, iz ) && + contain( vertex_box_local_owned, ix+1, iy+1, iz+1 ) ) { + elem_index = elem_index_interior++ ; + } + else { + elem_index = elem_index_boundary++ ; + } + + for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) { + unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) }; + + element_fixture.elem_to_node( nn , coord ); + + const size_t node_local_id = + box_map_id( node_box_local_used , + node_used_id_map , + coord[0] , coord[1] , coord[2] ); + + elem_node_ids( elem_index , nn ) = node_local_id ; + } + }}} + } + + //------------------------------------ + // Nested partitioning of elements by number of thread 'gangs' + + static void layout_elements_partitioned( + const BoxType vertex_box_local_used , + const BoxType /*vertex_box_local_owned*/ , + const BoxType node_box_local_used , + const std::vector<size_t> & node_used_id_map , + const ElementSpec element_fixture , + const size_t thread_gang_count , + const typename elem_node_ids_type::HostMirror elem_node_ids ) + { + std::vector< BoxType > element_box_gangs( thread_gang_count ); + + BoxType element_box_local_used = vertex_box_local_used ; + + element_box_local_used[0][1] -= 1 ; + element_box_local_used[1][1] -= 1 ; + element_box_local_used[2][1] -= 1 ; + + box_partition_rcb( element_box_local_used , element_box_gangs ); + + size_t elem_index = 0 ; + + for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) { + + const BoxType box = element_box_gangs[ig] ; + + for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) { + for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) { + for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) { + + for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) { + unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) }; + + element_fixture.elem_to_node( nn , coord ); + + const size_t node_local_id = + box_map_id( node_box_local_used , + node_used_id_map , + coord[0] , coord[1] , coord[2] ); + + elem_node_ids( elem_index , nn ) = node_local_id ; + } + }}} + } + } + + //------------------------------------ + + static FEMeshType create( const size_t proc_count , + const size_t proc_local , + const size_t gang_count , + const size_t elems_x , + const size_t elems_y , + const size_t elems_z , + const double x_coord_curve = 1 , + const double y_coord_curve = 1 , + const double z_coord_curve = 1 ) + { + const size_t vertices_x = elems_x + 1 ; + const size_t vertices_y = elems_y + 1 ; + const size_t vertices_z = elems_z + 1 ; + + const BoxBoundsLinear vertex_box_bounds ; + const ElementSpec element ; + + // Partition based upon vertices: + + BoxType vertex_box_global ; + std::vector< BoxType > vertex_box_parts( proc_count ); + + vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ; + vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ; + vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ; + + box_partition_rcb( vertex_box_global , vertex_box_parts ); + + const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ]; + + // Determine interior and used vertices: + + BoxType vertex_box_local_interior ; + BoxType vertex_box_local_used ; + + vertex_box_bounds.apply( vertex_box_global , + vertex_box_local_owned , + vertex_box_local_interior , + vertex_box_local_used ); + + // Element counts: + + const long local_elems_x = + ( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ; + const long local_elems_y = + ( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ; + const long local_elems_z = + ( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ; + + const size_t elem_count_total = std::max( long(0) , local_elems_x ) * + std::max( long(0) , local_elems_y ) * + std::max( long(0) , local_elems_z ); + + const long interior_elems_x = + ( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ; + const long interior_elems_y = + ( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ; + const long interior_elems_z = + ( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ; + + const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) * + std::max( long(0) , interior_elems_y ) * + std::max( long(0) , interior_elems_z ); + + // Expand vertex boxes to node boxes: + + BoxType node_box_global ; + BoxType node_box_local_used ; + std::vector< BoxType > node_box_parts ; + + element.create_node_boxes_from_vertex_boxes( + vertex_box_global , vertex_box_parts , + node_box_global , node_box_parts ); + + // Node communication maps: + + size_t node_count_interior = 0 ; + size_t node_count_owned = 0 ; + size_t node_count_total = 0 ; + std::vector<size_t> node_used_id_map ; + std::vector<size_t> node_part_counts ; + std::vector< std::vector<size_t> > node_send_map ; + + box_partition_maps( node_box_global , + node_box_parts , + element.box_bounds , + proc_local , + node_box_local_used , + node_used_id_map , + node_count_interior , + node_count_owned , + node_count_total , + node_part_counts , + node_send_map ); + + size_t node_count_send = 0 ; + for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) { + node_count_send += node_send_map[i].size(); + } + + size_t recv_msg_count = 0 ; + size_t send_msg_count = 0 ; + size_t send_count = 0 ; + + for ( size_t i = 1 ; i < proc_count ; ++i ) { + if ( node_part_counts[i] ) ++recv_msg_count ; + if ( node_send_map[i].size() ) { + ++send_msg_count ; + send_count += node_send_map[i].size(); + } + } + + // Finite element mesh: + + FEMeshType mesh ; + + if ( node_count_total ) { + mesh.node_coords = node_coords_type( "node_coords", node_count_total ); + } + + if ( elem_count_total ) { + mesh.elem_node_ids = + elem_node_ids_type( "elem_node_ids", elem_count_total ); + } + + mesh.parallel_data_map.assign( node_count_interior , + node_count_owned , + node_count_total , + recv_msg_count , + send_msg_count , + send_count ); + + typename node_coords_type::HostMirror node_coords = + Kokkos::create_mirror( mesh.node_coords ); + + typename elem_node_ids_type::HostMirror elem_node_ids = + Kokkos::create_mirror( mesh.elem_node_ids ); + + //------------------------------------ + // set node coordinates to grid location for subsequent verification + + for ( size_t iz = node_box_local_used[2][0] ; + iz < node_box_local_used[2][1] ; ++iz ) { + + for ( size_t iy = node_box_local_used[1][0] ; + iy < node_box_local_used[1][1] ; ++iy ) { + + for ( size_t ix = node_box_local_used[0][0] ; + ix < node_box_local_used[0][1] ; ++ix ) { + + const size_t node_local_id = + box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz ); + + node_coords( node_local_id , 0 ) = ix ; + node_coords( node_local_id , 1 ) = iy ; + node_coords( node_local_id , 2 ) = iz ; + }}} + + //------------------------------------ + // Initialize element-node connectivity: + + if ( 1 < gang_count ) { + layout_elements_partitioned( vertex_box_local_used , + vertex_box_local_owned , + node_box_local_used , + node_used_id_map , + element , + gang_count , + elem_node_ids ); + } + else { + layout_elements_interior_exterior( vertex_box_local_used , + vertex_box_local_owned , + node_box_local_used , + node_used_id_map , + element , + elem_count_interior , + elem_node_ids ); + } + + //------------------------------------ + // Populate node->element connectivity: + + std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 ); + + for ( size_t i = 0 ; i < elem_count_total ; ++i ) { + for ( size_t n = 0 ; n < element_node_count ; ++n ) { + ++node_elem_work[ elem_node_ids(i,n) ]; + } + } + + mesh.node_elem_ids = + Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work ); + + typename node_elem_ids_type::HostMirror + node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids ); + + for ( size_t i = 0 ; i < node_count_total ; ++i ) { + node_elem_work[i] = node_elem_ids.row_map[i]; + } + + // Looping in element order insures the list of elements + // is sorted by element index. + + for ( size_t i = 0 ; i < elem_count_total ; ++i ) { + for ( size_t n = 0 ; n < element_node_count ; ++n ) { + const unsigned nid = elem_node_ids(i, n); + const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ; + + node_elem_ids.entries( j , 0 ) = i ; + node_elem_ids.entries( j , 1 ) = n ; + } + } + //------------------------------------ + // Verify setup with node coordinates matching grid indices. + verify( node_coords , elem_node_ids , node_elem_ids ); + + //------------------------------------ + // Scale node coordinates to problem extent with + // nonlinear mapping. + { + const double problem_extent[3] = + { static_cast<double>( vertex_box_global[0][1] - 1 ) , + static_cast<double>( vertex_box_global[1][1] - 1 ) , + static_cast<double>( vertex_box_global[2][1] - 1 ) }; + + const double grid_extent[3] = + { static_cast<double>( node_box_global[0][1] - 1 ) , + static_cast<double>( node_box_global[1][1] - 1 ) , + static_cast<double>( node_box_global[2][1] - 1 ) }; + + for ( size_t i = 0 ; i < node_count_total ; ++i ) { + const double x_unit = node_coords(i,0) / grid_extent[0] ; + const double y_unit = node_coords(i,1) / grid_extent[1] ; + const double z_unit = node_coords(i,2) / grid_extent[2] ; + + node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) ); + node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) ); + node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) ); + } + } + + Kokkos::deep_copy( mesh.node_coords , node_coords ); + Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids ); + Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries ); + + //------------------------------------ + // Communication lists: + { + recv_msg_count = 0 ; + send_msg_count = 0 ; + send_count = 0 ; + + for ( size_t i = 1 ; i < proc_count ; ++i ) { + + // Order sending starting with the local processor rank + // to try to smooth out the amount of messages simultaneously + // send to a particular processor. + + const int proc = ( proc_local + i ) % proc_count ; + if ( node_part_counts[i] ) { + mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ; + mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ; + ++recv_msg_count ; + } + if ( node_send_map[i].size() ) { + mesh.parallel_data_map.host_send(send_msg_count,0) = proc ; + mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ; + for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) { + mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ; + } + ++send_msg_count ; + } + } + } + + return mesh ; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_BOXMESHFIXTURE_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/BoxMeshPartition.cpp b/lib/kokkos/example/multi_fem/BoxMeshPartition.cpp new file mode 100644 index 0000000000000000000000000000000000000000..17a6696fb9d934974a5c244802a93a2272e9c3da --- /dev/null +++ b/lib/kokkos/example/multi_fem/BoxMeshPartition.cpp @@ -0,0 +1,381 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <sstream> +#include <stdexcept> +#include <limits> +#include <BoxMeshPartition.hpp> + +//---------------------------------------------------------------------------- + +namespace { + +void box_partition( size_t ip , size_t up , + const BoxType & box , + BoxType * const p_box ) +{ + const size_t np = up - ip ; + + if ( 1 == np ) { + p_box[ip] = box ; + } + else { + // Choose axis with largest count: + + const size_t n0 = box[0][1] - box[0][0] ; + const size_t n1 = box[1][1] - box[1][0] ; + const size_t n2 = box[2][1] - box[2][0] ; + + const size_t axis = n2 > n1 ? ( n2 > n0 ? 2 : ( n1 > n0 ? 1 : 0 ) ) : + ( n1 > n0 ? 1 : 0 ); + + const size_t n = box[ axis ][1] - box[ axis ][0] ; + + if ( 0 == np % 3 ) { + const size_t np_part = np / 3 ; // exact + + const size_t nbox_low = (size_t)(( (double) n ) * ( 1.0 / 3.0 )); + const size_t nbox_mid = (size_t)(( (double) n ) * ( 2.0 / 3.0 )); + + BoxType dbox_low = box ; // P = [ip,ip+np/3) + BoxType dbox_mid = box ; // P = [ip+np/3,ip+2*np/3) + BoxType dbox_upp = box ; // P = [ip+2*np/3,ip+np) + + dbox_low[ axis ][1] = box[ axis ][0] + nbox_low ; + dbox_mid[ axis ][1] = box[ axis ][0] + nbox_mid ; + + dbox_mid[ axis ][0] = dbox_low[ axis ][1]; + dbox_upp[ axis ][0] = dbox_mid[ axis ][1]; + + box_partition( ip, ip + np_part, dbox_low , p_box ); + box_partition( ip+ np_part, ip + 2*np_part, dbox_mid , p_box ); + box_partition( ip+2*np_part, up, dbox_upp , p_box ); + } + else { + const size_t np_low = np / 2 ; /* Rounded down */ + const size_t nbox_low = (size_t) + (((double)n) * ( ((double) np_low ) / ((double) np ) )); + + BoxType dbox_low = box ; + BoxType dbox_upp = box ; + + dbox_low[ axis ][1] = dbox_low[ axis ][0] + nbox_low ; + dbox_upp[ axis ][0] = dbox_low[ axis ][1]; + + box_partition( ip, ip + np_low, dbox_low , p_box ); + box_partition( ip + np_low, up, dbox_upp , p_box ); + } + } +} + +size_t box_map_offset( const BoxType & local_use , + const size_t global_i , + const size_t global_j , + const size_t global_k ) + +{ + const size_t max = std::numeric_limits<size_t>::max(); + + const size_t n[3] = + { local_use[0][1] - local_use[0][0] , + local_use[1][1] - local_use[1][0] , + local_use[2][1] - local_use[2][0] }; + + const size_t use[3] = { + ( global_i >= local_use[0][0] ? global_i - local_use[0][0] : max ) , + ( global_j >= local_use[1][0] ? global_j - local_use[1][0] : max ) , + ( global_k >= local_use[2][0] ? global_k - local_use[2][0] : max ) }; + + const size_t offset = + ( use[0] < n[0] && use[1] < n[1] && use[2] < n[2] ) ? + ( use[0] + n[0] * ( use[1] + n[1] * use[2] ) ) : max ; + + if ( offset == max ) { + std::ostringstream msg ; + msg << "box_map_offset ERROR: " + << " use " << local_use + << " ( " << global_i + << " , " << global_j + << " , " << global_k + << " )" ; + throw std::runtime_error( msg.str() ); + } + + return offset ; +} + +} // namespace + +//---------------------------------------------------------------------------- + +void BoxBoundsLinear::apply( const BoxType & box_global , + const BoxType & box_part , + BoxType & box_interior , + BoxType & box_use ) const +{ + const unsigned ghost = 1 ; + + if ( 0 == count( box_part ) ) { + box_interior = box_part ; + box_use = box_part ; + } + else { + for ( size_t i = 0 ; i < 3 ; ++i ) { + + box_interior[i][0] = + ( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : ( + ( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost : + box_part[i][1] ); + + box_interior[i][1] = + ( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : ( + ( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost : + box_part[i][0] ); + + box_use[i][0] = + ( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost : + box_global[i][0] ; + box_use[i][1] = + ( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost : + box_global[i][1] ; + } + } +} + +void BoxBoundsQuadratic::apply( const BoxType & box_global , + const BoxType & box_part , + BoxType & box_interior , + BoxType & box_use ) const +{ + if ( 0 == count( box_part ) ) { + box_interior = box_part ; + box_use = box_part ; + } + else { + for ( size_t i = 0 ; i < 3 ; ++i ) { + const bool odd = ( box_part[i][0] - box_global[i][0] ) & 01 ; + + const unsigned ghost = odd ? 1 : 2 ; + + box_interior[i][0] = + ( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : ( + ( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost : + box_part[i][1] ); + + box_interior[i][1] = + ( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : ( + ( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost : + box_part[i][0] ); + + box_use[i][0] = + ( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost : + box_global[i][0] ; + box_use[i][1] = + ( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost : + box_global[i][1] ; + } + } +} + +//---------------------------------------------------------------------------- + +void box_partition_rcb( const BoxType & root_box , + std::vector<BoxType> & part_boxes ) +{ + const BoxBoundsLinear use_boxes ; + + const size_t part_count = part_boxes.size(); + + box_partition( 0 , part_count , root_box , & part_boxes[0] ); + + // Verify partitioning + + size_t total_cell = 0 ; + + for ( size_t i = 0 ; i < part_count ; ++i ) { + + total_cell += count( part_boxes[i] ); + + BoxType box_interior , box_use ; + + use_boxes.apply( root_box , part_boxes[i] , box_interior , box_use ); + + if ( count( box_use ) < count( part_boxes[i] ) || + count( part_boxes[i] ) < count( box_interior ) || + part_boxes[i] != intersect( part_boxes[i] , box_use ) || + box_interior != intersect( part_boxes[i] , box_interior )) { + + std::ostringstream msg ; + + msg << "box_partition_rcb ERROR : " + << "part_boxes[" << i << "] = " + << part_boxes[i] + << " use " << box_use + << " interior " << box_interior + << std::endl + << " part ^ use " << intersect( part_boxes[i] , box_use ) + << " part ^ interior " << intersect( part_boxes[i] , box_interior ); + + throw std::runtime_error( msg.str() ); + } + + for ( size_t j = i + 1 ; j < part_count ; ++j ) { + const BoxType tmp = intersect( part_boxes[i] , part_boxes[j] ); + + if ( count( tmp ) ) { + throw std::runtime_error( std::string("box partition intersection") ); + } + } + } + + if ( total_cell != count( root_box ) ) { + throw std::runtime_error( std::string("box partition count") ); + } +} + +//---------------------------------------------------------------------------- + +size_t box_map_id( const BoxType & local_use , + const std::vector<size_t> & local_use_id_map , + const size_t global_i , + const size_t global_j , + const size_t global_k ) + +{ + const size_t offset = + box_map_offset( local_use , global_i , global_j , global_k ); + return local_use_id_map[ offset ]; +} + +//---------------------------------------------------------------------------- + +void box_partition_maps( const BoxType & root_box , + const std::vector<BoxType> & part_boxes , + const BoxBounds & use_boxes , + const size_t my_part , + BoxType & my_use_box , + std::vector<size_t> & my_use_id_map , + size_t & my_count_interior , + size_t & my_count_owned , + size_t & my_count_uses , + std::vector<size_t> & my_part_counts , + std::vector<std::vector<size_t> > & my_send_map ) +{ + const size_t np = part_boxes.size(); + + if ( np <= my_part ) { + std::ostringstream msg ; + msg << "box_partition_maps ERROR : " + << " np(" << np << ") <= my_part(" << my_part << ")" ; + throw std::runtime_error( msg.str() ); + } + + const BoxType my_owned_box = part_boxes[my_part]; + BoxType my_interior_box ; + + + use_boxes.apply( root_box, my_owned_box, my_interior_box, my_use_box ); + + my_count_interior = count( my_interior_box ); + my_count_owned = count( my_owned_box ); + my_count_uses = count( my_use_box ); + + my_use_id_map.assign( my_count_uses , std::numeric_limits<size_t>::max() ); + + // Order ids as { owned-interior , owned-parallel , received_{(p+i)%np} } + + size_t offset_interior = 0 ; + size_t offset_parallel = my_count_interior ; + + for ( size_t iz = my_owned_box[2][0] ; iz < my_owned_box[2][1] ; ++iz ) { + for ( size_t iy = my_owned_box[1][0] ; iy < my_owned_box[1][1] ; ++iy ) { + for ( size_t ix = my_owned_box[0][0] ; ix < my_owned_box[0][1] ; ++ix ) { + const size_t offset = box_map_offset( my_use_box , ix , iy , iz ); + if ( contain( my_interior_box , ix , iy , iz ) ) { + my_use_id_map[ offset ] = offset_interior++ ; + } + else { + my_use_id_map[ offset ] = offset_parallel++ ; + } + }}} + + + my_part_counts.assign( np , (size_t) 0 ); + my_send_map.assign( np , std::vector<size_t>() ); + + my_part_counts[0] = my_count_owned ; + + for ( size_t i = 1 ; i < np ; ++i ) { + + const size_t ip = ( my_part + i ) % np ; + + const BoxType p_owned_box = part_boxes[ip]; + BoxType p_use_box , p_interior_box ; + use_boxes.apply( root_box, p_owned_box, p_interior_box, p_use_box ); + + const BoxType recv_box = intersect( my_use_box , p_owned_box ); + const BoxType send_box = intersect( my_owned_box , p_use_box ); + + if ( 0 != ( my_part_counts[i] = count( recv_box ) ) ) { + for ( size_t iz = recv_box[2][0] ; iz < recv_box[2][1] ; ++iz ) { + for ( size_t iy = recv_box[1][0] ; iy < recv_box[1][1] ; ++iy ) { + for ( size_t ix = recv_box[0][0] ; ix < recv_box[0][1] ; ++ix ) { + const size_t offset = box_map_offset( my_use_box , ix , iy , iz ); + my_use_id_map[ offset ] = offset_parallel++ ; + }}} + } + + if ( 0 != count( send_box ) ) { + for ( size_t iz = send_box[2][0] ; iz < send_box[2][1] ; ++iz ) { + for ( size_t iy = send_box[1][0] ; iy < send_box[1][1] ; ++iy ) { + for ( size_t ix = send_box[0][0] ; ix < send_box[0][1] ; ++ix ) { + const size_t offset = box_map_offset( my_use_box , ix , iy , iz ); + + my_send_map[ i ].push_back( my_use_id_map[ offset ] ); + }}} + } + } +} + + diff --git a/lib/kokkos/example/multi_fem/BoxMeshPartition.hpp b/lib/kokkos/example/multi_fem/BoxMeshPartition.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2aa6f57cc2b8f5b3b64682dddfb2f94dba7e0d1 --- /dev/null +++ b/lib/kokkos/example/multi_fem/BoxMeshPartition.hpp @@ -0,0 +1,210 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef BOXMESHPARTITION_HPP +#define BOXMESHPARTITION_HPP + +#include <cstddef> +#include <utility> +#include <vector> +#include <iostream> + +//---------------------------------------------------------------------------- + +struct BoxType { + size_t data[3][2] ; + + typedef size_t range_type[2] ; + + inline + const range_type & operator[]( size_t i ) const { return data[i]; } + + inline + range_type & operator[]( size_t i ) { return data[i]; } + + inline + bool operator == ( const BoxType & rhs ) const + { + return data[0][0] == rhs.data[0][0] && data[0][1] == rhs.data[0][1] && + data[1][0] == rhs.data[1][0] && data[1][1] == rhs.data[2][1] && + data[2][0] == rhs.data[2][0] && data[2][1] == rhs.data[2][1] ; + } + + inline + bool operator != ( const BoxType & rhs ) const + { + return data[0][0] != rhs.data[0][0] || data[0][1] != rhs.data[0][1] || + data[1][0] != rhs.data[1][0] || data[1][1] != rhs.data[1][1] || + data[2][0] != rhs.data[2][0] || data[2][1] != rhs.data[2][1] ; + } +}; + +inline +size_t count( const BoxType & b ) +{ + size_t n = 1 ; + for ( size_t i = 0 ; i < 3 ; ++i ) { + n *= b[i][1] > b[i][0] ? b[i][1] - b[i][0] : 0 ; + } + return n ; +} + +inline +bool contain( const BoxType & b , size_t i , size_t j , size_t k ) +{ + return b[0][0] <= i && i < b[0][1] && + b[1][0] <= j && j < b[1][1] && + b[2][0] <= k && k < b[2][1] ; +} + +inline +BoxType intersect( const BoxType & x , const BoxType & y ) +{ + BoxType z ; + for ( size_t i = 0 ; i < 3 ; ++i ) { + z[i][0] = std::max( x[i][0] , y[i][0] ); + z[i][1] = std::min( x[i][1] , y[i][1] ); + } + + return z ; +} + +inline +std::ostream & operator << ( std::ostream & s , const BoxType & box ) +{ + s << "{ " + << box[0][0] << " " << box[0][1] << " , " + << box[1][0] << " " << box[1][1] << " , " + << box[2][0] << " " << box[2][1] << " }" ; + return s ; +} + +//---------------------------------------------------------------------------- + +class BoxBounds { +public: + /** \brief Default bounds to one layer of ghosting */ + virtual + void apply( const BoxType & box_global , + const BoxType & box_part , + BoxType & box_interior , + BoxType & box_use ) const = 0 ; + + virtual ~BoxBounds() {} + BoxBounds() {} +}; + +class BoxBoundsLinear : public BoxBounds +{ +public: + /** \brief Default bounds to one layer of ghosting */ + virtual + void apply( const BoxType & box_global , + const BoxType & box_part , + BoxType & box_interior , + BoxType & box_use ) const ; + + virtual ~BoxBoundsLinear() {} + BoxBoundsLinear() {} +}; + +class BoxBoundsQuadratic : public BoxBounds { +public: + /** \brief Quadratic mesh: even ordinates have two layers, + * odd ordinates have one layer. + */ + virtual + void apply( const BoxType & box_global , + const BoxType & box_part , + BoxType & box_interior , + BoxType & box_use ) const ; + + virtual ~BoxBoundsQuadratic() {} + BoxBoundsQuadratic() {} +}; + +//---------------------------------------------------------------------------- +/* Partition box into part_boxes.size() sub-boxes */ + +void box_partition_rcb( const BoxType & root_box , + std::vector<BoxType> & part_boxes ); + +//---------------------------------------------------------------------------- +/* Determine local id layout and communication maps for partitioned boxes. + * + * Local ids are layed out as follows: + * { [ owned-interior ids not sent ] , + * [ owned-boundary ids to be sent to other processes ] , + * [ received ids from processor ( my_part + 1 ) % part_count ] + * [ received ids from processor ( my_part + 2 ) % part_count ] + * [ received ids from processor ( my_part + 3 ) % part_count ] + * ... }; + * + * This layout allows + * (1) received data to be copied into a contiguous block of memory + * (2) send data to be extracted from a contiguous block of memory. + */ +void box_partition_maps( + const BoxType & root_box , // [in] Global box + const std::vector<BoxType> & part_boxes , // [in] Partitioned boxes + const BoxBounds & use_boxes , // [in] Ghost boundaries + const size_t my_part , // [in] My local part + BoxType & my_use_box , // [out] My used box with ghost + std::vector<size_t> & my_use_id_map , // [out] Local ordering map + size_t & my_count_interior , // [out] How many interior + size_t & my_count_owned , // [out] How many owned + size_t & my_count_uses , // [out] How may used + std::vector<size_t> & my_part_counts , // [out] Partitioning of my_use_id_map + std::vector<std::vector<size_t> > & my_send_map ); // [out] Send id map + +/* Mapping of cartesian coordinate to local id */ +size_t box_map_id( const BoxType & my_use_box , + const std::vector<size_t> & my_use_id_map , + const size_t global_i , + const size_t global_j , + const size_t global_k ); + +//---------------------------------------------------------------------------- + +#endif /* #ifndef BOXMESHPARTITION_HPP */ + diff --git a/lib/kokkos/example/multi_fem/CMakeLists.txt b/lib/kokkos/example/multi_fem/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e3a40bc26f0fb45a12d59ddcfa0f767c3988a6f9 --- /dev/null +++ b/lib/kokkos/example/multi_fem/CMakeLists.txt @@ -0,0 +1,16 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") + +FILE(GLOB SOURCES *.cpp) + +SET(LIBRARIES kokkoscore) + +TRIBITS_ADD_EXECUTABLE( + multi_fem + SOURCES ${SOURCES} + COMM serial mpi + ) + diff --git a/lib/kokkos/example/multi_fem/Explicit.hpp b/lib/kokkos/example/multi_fem/Explicit.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cef1a37a1acc2598647134217c51c7f4085011cc --- /dev/null +++ b/lib/kokkos/example/multi_fem/Explicit.hpp @@ -0,0 +1,452 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef EXPLICIT_DRIVER_HPP +#define EXPLICIT_DRIVER_HPP + +#include <sys/time.h> +#include <iostream> +#include <iomanip> +#include <cstdlib> +#include <cmath> + +#include <impl/Kokkos_Timer.hpp> + +#include <ExplicitFunctors.hpp> + +//---------------------------------------------------------------------------- + +namespace Explicit { + +struct PerformanceData { + double mesh_time ; + double init_time ; + double internal_force_time ; + double central_diff ; + double comm_time ; + size_t number_of_steps ; + + PerformanceData() + : mesh_time(0) + , init_time(0) + , internal_force_time(0) + , central_diff(0) + , comm_time(0) + , number_of_steps(0) + {} + + void best( const PerformanceData & rhs ) + { + if ( rhs.mesh_time < mesh_time ) mesh_time = rhs.mesh_time ; + if ( rhs.init_time < init_time ) init_time = rhs.init_time ; + if ( rhs.internal_force_time < internal_force_time ) internal_force_time = rhs.internal_force_time ; + if ( rhs.central_diff < central_diff ) central_diff = rhs.central_diff ; + if ( rhs.comm_time < comm_time ) comm_time = rhs.comm_time ; + } +}; + +template< typename Scalar , class FixtureType > +PerformanceData run( const typename FixtureType::FEMeshType & mesh , + const int global_max_x , + const int global_max_y , + const int global_max_z , + const int steps , + const int print_sample ) +{ + typedef Scalar scalar_type ; + typedef FixtureType fixture_type ; + typedef typename fixture_type::execution_space execution_space ; + //typedef typename fixture_type::FEMeshType mesh_type ; // unused + + enum { ElementNodeCount = fixture_type::element_node_count }; + + const int NumStates = 2; + + const int total_num_steps = steps ; + + const Scalar user_dt = 5.0e-6; + //const Scalar end_time = 0.0050; + + // element block parameters + const Scalar lin_bulk_visc = 0.0; + const Scalar quad_bulk_visc = 0.0; + + // const Scalar lin_bulk_visc = 0.06; + // const Scalar quad_bulk_visc = 1.2; + // const Scalar hg_stiffness = 0.0; + // const Scalar hg_viscosity = 0.0; + // const Scalar hg_stiffness = 0.03; + // const Scalar hg_viscosity = 0.001; + + // material properties + const Scalar youngs_modulus=1.0e6; + const Scalar poissons_ratio=0.0; + const Scalar density = 8.0e-4; + + const comm::Machine machine = mesh.parallel_data_map.machine ; + + PerformanceData perf_data ; + + Kokkos::Timer wall_clock ; + + //------------------------------------ + // Generate fields + + typedef Fields< scalar_type , execution_space > fields_type ; + + fields_type mesh_fields( mesh , + lin_bulk_visc , + quad_bulk_visc , + youngs_modulus , + poissons_ratio , + density ); + + typename fields_type::node_coords_type::HostMirror + model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords ); + + typename fields_type::geom_state_array_type::HostMirror + displacement_h = Kokkos::create_mirror( mesh_fields.displacement ); + + typename fields_type::geom_state_array_type::HostMirror + velocity_h = Kokkos::create_mirror( mesh_fields.velocity ); + + Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords ); + + //------------------------------------ + // Initialization + + initialize_element<Scalar,execution_space>::apply( mesh_fields ); + initialize_node< Scalar,execution_space>::apply( mesh_fields ); + + const Scalar x_bc = global_max_x ; + + // Initial condition on velocity to initiate a pulse along the X axis + { + const unsigned X = 0; + for (int inode = 0; inode< mesh_fields.num_nodes; ++inode) { + if ( model_coords_h(inode,X) == 0) { + velocity_h(inode,X,0) = 1.0e3; + velocity_h(inode,X,1) = 1.0e3; + } + } + } + + Kokkos::deep_copy( mesh_fields.velocity , velocity_h ); + + //-------------------------------------------------------------------------- + // We will call a sequence of functions. These functions have been + // grouped into several functors to balance the number of global memory + // accesses versus requiring too many registers or too much L1 cache. + // Global memory accees have read/write cost and memory subsystem contention cost. + //-------------------------------------------------------------------------- + + perf_data.init_time = comm::max( machine , wall_clock.seconds() ); + + // Parameters required for the internal force computations. + + int current_state = 0; + int previous_state = 0; + int next_state = 0; + + perf_data.number_of_steps = total_num_steps ; + +#if defined( KOKKOS_HAVE_MPI ) + + typedef typename + fields_type::geom_state_array_type::value_type comm_value_type ; + + const unsigned comm_value_count = 6 ; + + Kokkos::AsyncExchange< comm_value_type , execution_space , + Kokkos::ParallelDataMap > + comm_exchange( mesh.parallel_data_map , comm_value_count ); + +#endif + + for (int step = 0; step < total_num_steps; ++step) { + + wall_clock.reset(); + + //------------------------------------------------------------------------ +#if defined( KOKKOS_HAVE_MPI ) + { + // Communicate "send" nodes' displacement and velocity next_state + // to the ghosted nodes. + // buffer packages: { { dx , dy , dz , vx , vy , vz }_node } + + pack_state< Scalar , execution_space > + ::apply( comm_exchange.buffer() , + mesh.parallel_data_map.count_interior , + mesh.parallel_data_map.count_send , + mesh_fields , next_state ); + + comm_exchange.setup(); + + comm_exchange.send_receive(); + + unpack_state< Scalar , execution_space > + ::apply( mesh_fields , next_state , + comm_exchange.buffer() , + mesh.parallel_data_map.count_owned , + mesh.parallel_data_map.count_receive ); + + execution_space::fence(); + } +#endif + + perf_data.comm_time += comm::max( machine , wall_clock.seconds() ); + + //------------------------------------------------------------------------ + // rotate the states + + previous_state = current_state; + current_state = next_state; + ++next_state; + next_state %= NumStates; + + wall_clock.reset(); + + // First kernel 'grad_hgop' combines two functions: + // gradient, velocity gradient + grad< Scalar , execution_space >::apply( mesh_fields , + current_state , + previous_state ); + + // Combine tensor decomposition and rotation functions. + decomp_rotate< Scalar , execution_space >::apply( mesh_fields , + current_state , + previous_state ); + + internal_force< Scalar , execution_space >::apply( mesh_fields , + user_dt , + current_state ); + + execution_space::fence(); + + perf_data.internal_force_time += + comm::max( machine , wall_clock.seconds() ); + + wall_clock.reset(); + + // Assembly of elements' contributions to nodal force into + // a nodal force vector. Update the accelerations, velocities, + // displacements. + // The same pattern can be used for matrix-free residual computations. + nodal_step< Scalar , execution_space >::apply( mesh_fields , + x_bc , + current_state, + next_state ); + execution_space::fence(); + + perf_data.central_diff += + comm::max( machine , wall_clock.seconds() ); + + if ( print_sample && 0 == step % 100 ) { + Kokkos::deep_copy( displacement_h , mesh_fields.displacement ); + Kokkos::deep_copy( velocity_h , mesh_fields.velocity ); + + if ( 1 == print_sample ) { + + std::cout << "step " << step + << " : displacement(*,0,0) =" ; + for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { + if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) { + std::cout << " " << displacement_h(i,0,next_state); + } + } + std::cout << std::endl ; + + const float tol = 1.0e-6 ; + const int yb = global_max_y ; + const int zb = global_max_z ; + std::cout << "step " << step + << " : displacement(*," << yb << "," << zb << ") =" ; + for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { + if ( fabs( model_coords_h(i,1) - yb ) < tol && + fabs( model_coords_h(i,2) - zb ) < tol ) { + std::cout << " " << displacement_h(i,0,next_state); + } + } + std::cout << std::endl ; + } + else if ( 2 == print_sample ) { + + const float tol = 1.0e-6 ; + const int xb = global_max_x / 2 ; + const int yb = global_max_y / 2 ; + const int zb = global_max_z / 2 ; + + for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { + if ( fabs( model_coords_h(i,0) - xb ) < tol && + fabs( model_coords_h(i,1) - yb ) < tol && + fabs( model_coords_h(i,2) - zb ) < tol ) { + std::cout << "step " << step + << " : displacement(" + << xb << "," << yb << "," << zb << ") = {" + << std::setprecision(6) + << " " << displacement_h(i,0,next_state) + << std::setprecision(2) + << " " << displacement_h(i,1,next_state) + << std::setprecision(2) + << " " << displacement_h(i,2,next_state) + << " }" << std::endl ; + } + } + } + } + } + + return perf_data ; +} + + +template <typename Scalar, typename Device> +static void driver( const char * const label , + comm::Machine machine , + const int gang_count , + const int elem_count_beg , + const int elem_count_end , + const int runs ) +{ + typedef Scalar scalar_type ; + typedef Device execution_space ; + typedef double coordinate_scalar_type ; + typedef FixtureElementHex8 fixture_element_type ; + + typedef BoxMeshFixture< coordinate_scalar_type , + execution_space , + fixture_element_type > fixture_type ; + + typedef typename fixture_type::FEMeshType mesh_type ; + + const size_t proc_count = comm::size( machine ); + const size_t proc_rank = comm::rank( machine ); + + const int space = 15 ; + const int steps = 1000 ; + const int print_sample = 0 ; + + if ( comm::rank( machine ) == 0 ) { + + std::cout << std::endl ; + std::cout << "\"MiniExplicitDynamics with Kokkos " << label + << " time_steps(" << steps << ")" + << "\"" << std::endl; + std::cout << std::left << std::setw(space) << "\"Element\" , "; + std::cout << std::left << std::setw(space) << "\"Node\" , "; + std::cout << std::left << std::setw(space) << "\"Initialize\" , "; + std::cout << std::left << std::setw(space) << "\"ElemForce\" , "; + std::cout << std::left << std::setw(space) << "\"NodeUpdate\" , "; + std::cout << std::left << std::setw(space) << "\"NodeComm\" , "; + std::cout << std::left << std::setw(space) << "\"Time/Elem\" , "; + std::cout << std::left << std::setw(space) << "\"Time/Node\""; + + std::cout << std::endl; + + std::cout << std::left << std::setw(space) << "\"count\" , "; + std::cout << std::left << std::setw(space) << "\"count\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\" , "; + std::cout << std::left << std::setw(space) << "\"microsec\""; + + std::cout << std::endl; + } + + for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 ) + { + const int iz = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) ); + const int iy = iz + 1 ; + const int ix = 2 * iy ; + const int nelem = ix * iy * iz ; + const int nnode = ( ix + 1 ) * ( iy + 1 ) * ( iz + 1 ); + + mesh_type mesh = + fixture_type::create( proc_count , proc_rank , gang_count , + ix , iy , iz ); + + mesh.parallel_data_map.machine = machine ; + + PerformanceData perf , best ; + + for(int j = 0; j < runs; j++){ + + perf = run<scalar_type,fixture_type>(mesh,ix,iy,iz,steps,print_sample); + + if( j == 0 ) { + best = perf ; + } + else { + best.best( perf ); + } + } + + if ( comm::rank( machine ) == 0 ) { + double time_per_element = + ( best.internal_force_time ) / ( nelem * perf.number_of_steps ); + double time_per_node = + ( best.comm_time + best.central_diff ) / ( nnode * perf.number_of_steps ); + + std::cout << std::setw(space-3) << nelem << " , " + << std::setw(space-3) << nnode << " , " + << std::setw(space-3) << best.number_of_steps << " , " + << std::setw(space-3) << best.init_time * 1000000 << " , " + << std::setw(space-3) + << ( best.internal_force_time * 1000000 ) / best.number_of_steps << " , " + << std::setw(space-3) + << ( best.central_diff * 1000000 ) / best.number_of_steps << " , " + << std::setw(space-3) + << ( best.comm_time * 1000000 ) / best.number_of_steps << " , " + << std::setw(space-3) << time_per_element * 1000000 << " , " + << std::setw(space-3) << time_per_node * 1000000 + << std::endl ; + } + } +} + + +} // namespace Explicit + +#endif /* #ifndef EXPLICIT_DRIVER_HPP */ diff --git a/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp b/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..feea82244f2a57571034a87e31a537c530b0062e --- /dev/null +++ b/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp @@ -0,0 +1,1471 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPLICITFUNCTORS_HPP +#define KOKKOS_EXPLICITFUNCTORS_HPP + +#include <math.h> +#include <Kokkos_Core.hpp> +#include <FEMesh.hpp> + +namespace Explicit { + +template<typename Scalar , class Device > +struct Fields { + + static const int NumStates = 2 ; + static const int SpatialDim = 3 ; + static const int ElemNodeCount = 8 ; + + // Indices for full 3x3 tensor: + + static const int K_F_XX = 0 ; + static const int K_F_YY = 1 ; + static const int K_F_ZZ = 2 ; + static const int K_F_XY = 3 ; + static const int K_F_YZ = 4 ; + static const int K_F_ZX = 5 ; + static const int K_F_YX = 6 ; + static const int K_F_ZY = 7 ; + static const int K_F_XZ = 8 ; + + // Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector + + static const int K_S_XX = 0 ; + static const int K_S_YY = 1 ; + static const int K_S_ZZ = 2 ; + static const int K_S_XY = 3 ; + static const int K_S_YZ = 4 ; + static const int K_S_ZX = 5 ; + static const int K_S_YX = 3 ; + static const int K_S_ZY = 4 ; + static const int K_S_XZ = 5 ; + + // Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector + + static const int K_V_XY = 0 ; + static const int K_V_YZ = 1 ; + static const int K_V_ZX = 2 ; + + + typedef Device execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef HybridFEM::FEMesh<double,ElemNodeCount,execution_space> FEMesh ; + + typedef typename FEMesh::node_coords_type node_coords_type ; + typedef typename FEMesh::elem_node_ids_type elem_node_ids_type ; + typedef typename FEMesh::node_elem_ids_type node_elem_ids_type ; + typedef typename Kokkos::ParallelDataMap parallel_data_map ; + + typedef Kokkos::View< double[][ SpatialDim ][ NumStates ] , execution_space > geom_state_array_type ; + typedef Kokkos::View< Scalar[][ SpatialDim ] , execution_space > geom_array_type ; + typedef Kokkos::View< Scalar[] , execution_space > array_type ; + typedef Kokkos::View< Scalar , execution_space > scalar_type ; + + typedef Kokkos::View< Scalar[][ 6 ] , execution_space > elem_sym_tensor_type ; + typedef Kokkos::View< Scalar[][ 9 ] , execution_space > elem_tensor_type ; + typedef Kokkos::View< Scalar[][ 9 ][ NumStates ] , execution_space > elem_tensor_state_type ; + typedef Kokkos::View< Scalar[][ SpatialDim ][ ElemNodeCount ] , execution_space > elem_node_geom_type ; + + // Parameters: + const int num_nodes ; + const int num_nodes_owned ; + const int num_elements ; + + const Scalar lin_bulk_visc; + const Scalar quad_bulk_visc; + const Scalar two_mu; + const Scalar bulk_modulus; + const Scalar density; + + // Mesh: + const elem_node_ids_type elem_node_connectivity ; + const node_elem_ids_type node_elem_connectivity ; + const node_coords_type model_coords ; + + // Compute: + const scalar_type dt ; + const scalar_type prev_dt ; + const geom_state_array_type displacement ; + const geom_state_array_type velocity ; + const geom_array_type acceleration ; + const geom_array_type internal_force ; + const array_type nodal_mass ; + const array_type elem_mass ; + const array_type internal_energy ; + const elem_sym_tensor_type stress_new ; + const elem_tensor_state_type rotation ; + const elem_node_geom_type element_force ; + const elem_tensor_type vel_grad ; + const elem_sym_tensor_type stretch ; + const elem_sym_tensor_type rot_stretch ; + + Fields( + const FEMesh & mesh, + Scalar arg_lin_bulk_visc, + Scalar arg_quad_bulk_visc, + Scalar youngs_modulus, + Scalar poissons_ratio, + Scalar arg_density ) + : num_nodes( mesh.parallel_data_map.count_owned + + mesh.parallel_data_map.count_receive ) + , num_nodes_owned( mesh.parallel_data_map.count_owned ) + , num_elements( mesh.elem_node_ids.dimension_0() ) + , lin_bulk_visc( arg_lin_bulk_visc ) + , quad_bulk_visc( arg_quad_bulk_visc ) + , two_mu(youngs_modulus/(1.0+poissons_ratio)) + , bulk_modulus(youngs_modulus/(3*(1.0-2.0*poissons_ratio))) + , density(arg_density) + + // mesh + + , elem_node_connectivity( mesh.elem_node_ids ) // ( num_elements , ElemNodeCount ) + , node_elem_connectivity( mesh.node_elem_ids ) // ( num_nodes , ... ) + , model_coords( mesh.node_coords ) // ( num_nodes , 3 ) + + // compute with input/output + + , dt( "dt" ) + , prev_dt( "prev_dt" ) + , displacement( "displacement" , num_nodes ) + , velocity( "velocity" , num_nodes ) + , acceleration( "acceleration" , num_nodes_owned ) + , internal_force( "internal_force" , num_nodes_owned ) + , nodal_mass( "nodal_mass" , num_nodes_owned ) + , elem_mass( "elem_mass" , num_elements ) + , internal_energy( "internal_energy" , num_elements ) + , stress_new( "stress_new" , num_elements ) + + // temporary arrays + + , rotation( "rotation" , num_elements ) + , element_force( "element_force" , num_elements ) + , vel_grad( "vel_grad" , num_elements ) + , stretch( "stretch" , num_elements ) + , rot_stretch( "rot_stretch" , num_elements ) + { } +}; + + +//---------------------------------------------------------------------------- + +template< typename Scalar , class DeviceType > +KOKKOS_INLINE_FUNCTION +Scalar dot8( const Scalar * a , const Scalar * b ) +{ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] + + a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; } + +template< typename Scalar , class DeviceType > +KOKKOS_INLINE_FUNCTION +void comp_grad( const Scalar * const x , + const Scalar * const y , + const Scalar * const z, + Scalar * const grad_x , + Scalar * const grad_y , + Scalar * const grad_z ) +{ + // calc X difference vectors + + Scalar R42=(x[3] - x[1]); + Scalar R52=(x[4] - x[1]); + Scalar R54=(x[4] - x[3]); + + Scalar R63=(x[5] - x[2]); + Scalar R83=(x[7] - x[2]); + Scalar R86=(x[7] - x[5]); + + Scalar R31=(x[2] - x[0]); + Scalar R61=(x[5] - x[0]); + Scalar R74=(x[6] - x[3]); + + Scalar R72=(x[6] - x[1]); + Scalar R75=(x[6] - x[4]); + Scalar R81=(x[7] - x[0]); + + Scalar t1=(R63 + R54); + Scalar t2=(R61 + R74); + Scalar t3=(R72 + R81); + + Scalar t4 =(R86 + R42); + Scalar t5 =(R83 + R52); + Scalar t6 =(R75 + R31); + + // Calculate Y gradient from X and Z data + + grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54); + grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61); + grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72); + grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83); + grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61); + grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72); + grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83); + grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54); + + // calc Z difference vectors + + R42=(z[3] - z[1]); + R52=(z[4] - z[1]); + R54=(z[4] - z[3]); + + R63=(z[5] - z[2]); + R83=(z[7] - z[2]); + R86=(z[7] - z[5]); + + R31=(z[2] - z[0]); + R61=(z[5] - z[0]); + R74=(z[6] - z[3]); + + R72=(z[6] - z[1]); + R75=(z[6] - z[4]); + R81=(z[7] - z[0]); + + t1=(R63 + R54); + t2=(R61 + R74); + t3=(R72 + R81); + + t4 =(R86 + R42); + t5 =(R83 + R52); + t6 =(R75 + R31); + + // Calculate X gradient from Y and Z data + + grad_x[0] = (y[1] * t1) - (y[2] * R42) - (y[3] * t5) + (y[4] * t4) + (y[5] * R52) - (y[7] * R54); + grad_x[1] = (y[2] * t2) + (y[3] * R31) - (y[0] * t1) - (y[5] * t6) + (y[6] * R63) - (y[4] * R61); + grad_x[2] = (y[3] * t3) + (y[0] * R42) - (y[1] * t2) - (y[6] * t4) + (y[7] * R74) - (y[5] * R72); + grad_x[3] = (y[0] * t5) - (y[1] * R31) - (y[2] * t3) + (y[7] * t6) + (y[4] * R81) - (y[6] * R83); + grad_x[4] = (y[5] * t3) + (y[6] * R86) - (y[7] * t2) - (y[0] * t4) - (y[3] * R81) + (y[1] * R61); + grad_x[5] = (y[6] * t5) - (y[4] * t3) - (y[7] * R75) + (y[1] * t6) - (y[0] * R52) + (y[2] * R72); + grad_x[6] = (y[7] * t1) - (y[5] * t5) - (y[4] * R86) + (y[2] * t4) - (y[1] * R63) + (y[3] * R83); + grad_x[7] = (y[4] * t2) - (y[6] * t1) + (y[5] * R75) - (y[3] * t6) - (y[2] * R74) + (y[0] * R54); + + // calc Y difference vectors + + R42=(y[3] - y[1]); + R52=(y[4] - y[1]); + R54=(y[4] - y[3]); + + R63=(y[5] - y[2]); + R83=(y[7] - y[2]); + R86=(y[7] - y[5]); + + R31=(y[2] - y[0]); + R61=(y[5] - y[0]); + R74=(y[6] - y[3]); + + R72=(y[6] - y[1]); + R75=(y[6] - y[4]); + R81=(y[7] - y[0]); + + t1=(R63 + R54); + t2=(R61 + R74); + t3=(R72 + R81); + + t4 =(R86 + R42); + t5 =(R83 + R52); + t6 =(R75 + R31); + + // Calculate Z gradient from X and Y data + + grad_z[0] = (x[1] * t1) - (x[2] * R42) - (x[3] * t5) + (x[4] * t4) + (x[5] * R52) - (x[7] * R54); + grad_z[1] = (x[2] * t2) + (x[3] * R31) - (x[0] * t1) - (x[5] * t6) + (x[6] * R63) - (x[4] * R61); + grad_z[2] = (x[3] * t3) + (x[0] * R42) - (x[1] * t2) - (x[6] * t4) + (x[7] * R74) - (x[5] * R72); + grad_z[3] = (x[0] * t5) - (x[1] * R31) - (x[2] * t3) + (x[7] * t6) + (x[4] * R81) - (x[6] * R83); + grad_z[4] = (x[5] * t3) + (x[6] * R86) - (x[7] * t2) - (x[0] * t4) - (x[3] * R81) + (x[1] * R61); + grad_z[5] = (x[6] * t5) - (x[4] * t3) - (x[7] * R75) + (x[1] * t6) - (x[0] * R52) + (x[2] * R72); + grad_z[6] = (x[7] * t1) - (x[5] * t5) - (x[4] * R86) + (x[2] * t4) - (x[1] * R63) + (x[3] * R83); + grad_z[7] = (x[4] * t2) - (x[6] * t1) + (x[5] * R75) - (x[3] * t6) - (x[2] * R74) + (x[0] * R54); +} + +//---------------------------------------------------------------------------- + +template< typename Scalar , class DeviceType > +struct initialize_element +{ + typedef DeviceType execution_space ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + typename Fields::elem_node_ids_type elem_node_connectivity ; + typename Fields::node_coords_type model_coords ; + typename Fields::elem_sym_tensor_type stretch ; + typename Fields::elem_tensor_state_type rotation ; + typename Fields::array_type elem_mass ; + + const Scalar density ; + + initialize_element( const Fields & mesh_fields ) + : elem_node_connectivity( mesh_fields.elem_node_connectivity ) + , model_coords( mesh_fields.model_coords ) + , stretch( mesh_fields.stretch ) + , rotation( mesh_fields.rotation ) + , elem_mass( mesh_fields.elem_mass ) + , density( mesh_fields.density ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( int ielem )const + { + const int K_XX = 0 ; + const int K_YY = 1 ; + const int K_ZZ = 2 ; + const Scalar ONE12TH = 1.0 / 12.0 ; + + Scalar x[ Fields::ElemNodeCount ]; + Scalar y[ Fields::ElemNodeCount ]; + Scalar z[ Fields::ElemNodeCount ]; + Scalar grad_x[ Fields::ElemNodeCount ]; + Scalar grad_y[ Fields::ElemNodeCount ]; + Scalar grad_z[ Fields::ElemNodeCount ]; + + for ( int i = 0 ; i < Fields::ElemNodeCount ; ++i ) { + const int n = elem_node_connectivity( ielem , i ); + + x[i] = model_coords( n , 0 ); + y[i] = model_coords( n , 1 ); + z[i] = model_coords( n , 2 ); + } + + comp_grad<Scalar,execution_space>( x, y, z, grad_x, grad_y, grad_z); + + stretch(ielem,K_XX) = 1 ; + stretch(ielem,K_YY) = 1 ; + stretch(ielem,K_ZZ) = 1 ; + + rotation(ielem,K_XX,0) = 1 ; + rotation(ielem,K_YY,0) = 1 ; + rotation(ielem,K_ZZ,0) = 1 ; + + rotation(ielem,K_XX,1) = 1 ; + rotation(ielem,K_YY,1) = 1 ; + rotation(ielem,K_ZZ,1) = 1 ; + + elem_mass(ielem) = ONE12TH * density * + dot8<Scalar,execution_space>( x , grad_x ); + } + + static void apply( const Fields & mesh_fields ) + { + initialize_element op( mesh_fields ); + Kokkos::parallel_for( mesh_fields.num_elements , op ); + } +}; + + +template<typename Scalar , class DeviceType > +struct initialize_node +{ + typedef DeviceType execution_space ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + typename Fields::node_elem_ids_type node_elem_connectivity ; + typename Fields::array_type nodal_mass ; + typename Fields::array_type elem_mass ; + + static const int ElemNodeCount = Fields::ElemNodeCount ; + + initialize_node( const Fields & mesh_fields ) + : node_elem_connectivity( mesh_fields.node_elem_connectivity ) + , nodal_mass( mesh_fields.nodal_mass ) + , elem_mass( mesh_fields.elem_mass ) + {} + + + KOKKOS_INLINE_FUNCTION + void operator()( int inode )const + { + const int begin = node_elem_connectivity.row_map[inode]; + const int end = node_elem_connectivity.row_map[inode+1]; + + Scalar node_mass = 0; + + for(int i = begin; i != end; ++i) { + const int elem_id = node_elem_connectivity.entries( i , 0 ); + node_mass += elem_mass(elem_id); + } + + nodal_mass(inode) = node_mass / ElemNodeCount ; + } + + static void apply( const Fields & mesh_fields ) + { + initialize_node op( mesh_fields ); + Kokkos::parallel_for( mesh_fields.num_nodes_owned , op ); + } +}; + +//---------------------------------------------------------------------------- + + +template<typename Scalar, class DeviceType > +struct grad +{ + typedef DeviceType execution_space ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + static const int ElemNodeCount = Fields::ElemNodeCount ; + + static const int K_F_XX = Fields::K_F_XX ; + static const int K_F_YY = Fields::K_F_YY ; + static const int K_F_ZZ = Fields::K_F_ZZ ; + static const int K_F_XY = Fields::K_F_XY ; + static const int K_F_YZ = Fields::K_F_YZ ; + static const int K_F_ZX = Fields::K_F_ZX ; + static const int K_F_YX = Fields::K_F_YX ; + static const int K_F_ZY = Fields::K_F_ZY ; + static const int K_F_XZ = Fields::K_F_XZ ; + + // Global arrays used by this functor. + + const typename Fields::elem_node_ids_type elem_node_connectivity ; + const typename Fields::node_coords_type model_coords ; + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; + const typename Fields::elem_tensor_type vel_grad ; + const typename Fields::scalar_type dt ; + + const int current_state; + const int previous_state; + + // Constructor on the Host to populate this device functor. + // All array view copies are shallow. + grad( const Fields & fields, + const int arg_current_state, + const int arg_previous_state) + : elem_node_connectivity( fields.elem_node_connectivity) + , model_coords( fields.model_coords) + , displacement( fields.displacement) + , velocity( fields.velocity) + , vel_grad( fields.vel_grad) + , dt( fields.dt) + , current_state(arg_current_state) + , previous_state(arg_previous_state) + { } + + //-------------------------------------------------------------------------- + + // Calculate Velocity Gradients + KOKKOS_INLINE_FUNCTION + void v_grad( int ielem, + Scalar * vx, Scalar * vy, Scalar * vz, + Scalar * grad_x, Scalar * grad_y, Scalar * grad_z, + Scalar inv_vol) const + { + const int K_F_XX = Fields::K_F_XX ; + const int K_F_YY = Fields::K_F_YY ; + const int K_F_ZZ = Fields::K_F_ZZ ; + const int K_F_XY = Fields::K_F_XY ; + const int K_F_YZ = Fields::K_F_YZ ; + const int K_F_ZX = Fields::K_F_ZX ; + const int K_F_YX = Fields::K_F_YX ; + const int K_F_ZY = Fields::K_F_ZY ; + const int K_F_XZ = Fields::K_F_XZ ; + + vel_grad(ielem, K_F_XX) = inv_vol * dot8<Scalar,execution_space>( vx , grad_x ); + vel_grad(ielem, K_F_YX) = inv_vol * dot8<Scalar,execution_space>( vy , grad_x ); + vel_grad(ielem, K_F_ZX) = inv_vol * dot8<Scalar,execution_space>( vz , grad_x ); + + vel_grad(ielem, K_F_XY) = inv_vol * dot8<Scalar,execution_space>( vx , grad_y ); + vel_grad(ielem, K_F_YY) = inv_vol * dot8<Scalar,execution_space>( vy , grad_y ); + vel_grad(ielem, K_F_ZY) = inv_vol * dot8<Scalar,execution_space>( vz , grad_y ); + + vel_grad(ielem, K_F_XZ) = inv_vol * dot8<Scalar,execution_space>( vx , grad_z ); + vel_grad(ielem, K_F_YZ) = inv_vol * dot8<Scalar,execution_space>( vy , grad_z ); + vel_grad(ielem, K_F_ZZ) = inv_vol * dot8<Scalar,execution_space>( vz , grad_z ); + } + + //-------------------------------------------------------------------------- + // Functor operator() which calls the three member functions. + + + KOKKOS_INLINE_FUNCTION + void operator()( int ielem )const + { + const int X = 0 ; + const int Y = 1 ; + const int Z = 2 ; + const Scalar dt_scale = -0.5 * *dt; + + // declare and reuse local data for frequently accessed data to + // reduce global memory reads and writes. + + Scalar x[8], y[8], z[8]; + Scalar vx[8], vy[8], vz[8]; + Scalar grad_x[8], grad_y[8], grad_z[8]; + + // Read global velocity once and use many times + // via local registers / L1 cache. + // store the velocity information in local memory before using, + // so it can be returned for other functions to use + + // Read global coordinates and velocity once and use many times + // via local registers / L1 cache. + // load X coordinate information and move by half time step + + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + const int n = elem_node_connectivity( ielem , i ); + + vx[i] = velocity( n , X , current_state ); + vy[i] = velocity( n , Y , current_state ); + vz[i] = velocity( n , Z , current_state ); + + x[i] = model_coords( n , X ) + + displacement( n , X , current_state ) + + dt_scale * vx[i]; + + y[i] = model_coords( n , Y ) + + displacement( n , Y , current_state ) + + dt_scale * vy[i]; + + z[i] = model_coords( n , Z ) + + displacement( n , Z , current_state ) + + dt_scale * vz[i]; + } + + comp_grad<Scalar,execution_space>( x, y, z, grad_x, grad_y, grad_z); + + // Calculate hexahedral volume from x model_coords and gradient information + + const Scalar inv_vol = 1.0 / dot8<Scalar,execution_space>( x , grad_x ); + + v_grad(ielem, vx, vy, vz, grad_x, grad_y, grad_z, inv_vol); + } + + static void apply( const Fields & fields , + const int arg_current_state , + const int arg_previous_state ) + { + grad op( fields, arg_current_state , arg_previous_state ); + Kokkos::parallel_for( fields.num_elements , op ); + } +}; + +//---------------------------------------------------------------------------- + +template<typename Scalar, class DeviceType > +struct decomp_rotate +{ + typedef DeviceType execution_space ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + static const int ElemNodeCount = Fields::ElemNodeCount ; + + static const int K_F_XX = Fields::K_F_XX ; + static const int K_F_YY = Fields::K_F_YY ; + static const int K_F_ZZ = Fields::K_F_ZZ ; + static const int K_F_XY = Fields::K_F_XY ; + static const int K_F_YZ = Fields::K_F_YZ ; + static const int K_F_ZX = Fields::K_F_ZX ; + static const int K_F_YX = Fields::K_F_YX ; + static const int K_F_ZY = Fields::K_F_ZY ; + static const int K_F_XZ = Fields::K_F_XZ ; + + static const int K_S_XX = Fields::K_S_XX ; + static const int K_S_YY = Fields::K_S_YY ; + static const int K_S_ZZ = Fields::K_S_ZZ ; + static const int K_S_XY = Fields::K_S_XY ; + static const int K_S_YZ = Fields::K_S_YZ ; + static const int K_S_ZX = Fields::K_S_ZX ; + static const int K_S_YX = Fields::K_S_YX ; + static const int K_S_ZY = Fields::K_S_ZY ; + static const int K_S_XZ = Fields::K_S_XZ ; + + static const int K_V_XY = Fields::K_V_XY ; + static const int K_V_YZ = Fields::K_V_YZ ; + static const int K_V_ZX = Fields::K_V_ZX ; + + // Global arrays used by this functor. + + const typename Fields::elem_tensor_state_type rotation ; + const typename Fields::elem_tensor_type vel_grad ; + const typename Fields::elem_sym_tensor_type stretch ; + const typename Fields::elem_sym_tensor_type rot_stretch ; + const typename Fields::scalar_type dt_value ; + + const int current_state; + const int previous_state; + + decomp_rotate( const Fields & mesh_fields , + const int arg_current_state, + const int arg_previous_state) + : rotation( mesh_fields.rotation ) + , vel_grad( mesh_fields.vel_grad ) + , stretch( mesh_fields.stretch ) + , rot_stretch( mesh_fields.rot_stretch ) + , dt_value( mesh_fields.dt) + , current_state( arg_current_state) + , previous_state(arg_previous_state) + {} + + static void apply( const Fields & mesh_fields , + const int arg_current_state , + const int arg_previous_state ) + { + decomp_rotate op( mesh_fields , arg_current_state , arg_previous_state ); + Kokkos::parallel_for( mesh_fields.num_elements , op ); + } + + + KOKKOS_INLINE_FUNCTION + void additive_decomp(int ielem, Scalar * v_gr, Scalar * str_ten) const + { + // In addition to calculating stretching_tensor, + // use this as an opportunity to load global + // variables into a local space + + for ( int i = 0 ; i < 9 ; ++i ) { + v_gr[i] = vel_grad( ielem , i ); + } + + // + // Symmetric part + // + str_ten[K_S_XX] = v_gr[K_F_XX]; + str_ten[K_S_YY] = v_gr[K_F_YY]; + str_ten[K_S_ZZ] = v_gr[K_F_ZZ]; + str_ten[K_S_XY] = 0.5*(v_gr[K_F_XY] + v_gr[K_F_YX]); + str_ten[K_S_YZ] = 0.5*(v_gr[K_F_YZ] + v_gr[K_F_ZY]); + str_ten[K_S_ZX] = 0.5*(v_gr[K_F_ZX] + v_gr[K_F_XZ]); + } + + KOKKOS_INLINE_FUNCTION + void polar_decomp(int ielem, Scalar * v_gr, Scalar * str_ten, Scalar * str, Scalar * vort, Scalar * rot_old, Scalar * rot_new)const + { + const Scalar dt = *dt_value; + const Scalar dt_half = 0.5 * dt; + + // Skew Symmetric part + vort[K_V_XY] = 0.5*(v_gr[K_F_XY] - v_gr[K_F_YX]); + vort[K_V_YZ] = 0.5*(v_gr[K_F_YZ] - v_gr[K_F_ZY]); + vort[K_V_ZX] = 0.5*(v_gr[K_F_ZX] - v_gr[K_F_XZ]); + + // calculate the rates of rotation via gauss elimination. + for ( int i = 0 ; i < 6 ; ++i ) { + str[i] = stretch(ielem, i); + } + + Scalar z1 = str_ten[K_S_XY] * str[K_S_ZX] - + str_ten[K_S_ZX] * str[K_S_XY] + + str_ten[K_S_YY] * str[K_S_YZ] - + str_ten[K_S_YZ] * str[K_S_YY] + + str_ten[K_S_YZ] * str[K_S_ZZ] - + str_ten[K_S_ZZ] * str[K_S_YZ]; + + Scalar z2 = str_ten[K_S_ZX] * str[K_S_XX] - + str_ten[K_S_XX] * str[K_S_ZX] + + str_ten[K_S_YZ] * str[K_S_XY] - + str_ten[K_S_XY] * str[K_S_YZ] + + str_ten[K_S_ZZ] * str[K_S_ZX] - + str_ten[K_S_ZX] * str[K_S_ZZ]; + + Scalar z3 = str_ten[K_S_XX] * str[K_S_XY] - + str_ten[K_S_XY] * str[K_S_XX] + + str_ten[K_S_XY] * str[K_S_YY] - + str_ten[K_S_YY] * str[K_S_XY] + + str_ten[K_S_ZX] * str[K_S_YZ] - + str_ten[K_S_YZ] * str[K_S_ZX]; + + // forward elimination + const Scalar a1inv = 1.0 / (str[K_S_YY] + str[K_S_ZZ]); + + const Scalar a4BYa1 = -1 * str[K_S_XY] * a1inv; + + const Scalar a2inv = 1.0 / (str[K_S_ZZ] + str[K_S_XX] + str[K_S_XY] * a4BYa1); + + const Scalar a5 = -str[K_S_YZ] + str[K_S_ZX] * a4BYa1; + + z2 -= z1 * a4BYa1; + Scalar a6BYa1 = -1 * str[K_S_ZX] * a1inv; + const Scalar a5BYa2 = a5 * a2inv; + z3 -= z1 * a6BYa1 - z2 * a5BYa2; + + // backward substitution - + z3 /= (str[K_S_XX] + str[K_S_YY] + str[K_S_ZX] * a6BYa1 + a5 * a5BYa2); + z2 = (z2 - a5 * z3) * a2inv; + z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2); + + // calculate rotation rates - recall that spin_rate is an asymmetric tensor, + // so compute spin rate vector as dual of spin rate tensor, + // i.e w_i = e_ijk * spin_rate_jk + z1 += vort[K_V_YZ]; + z2 += vort[K_V_ZX]; + z3 += vort[K_V_XY]; + + // update rotation tensor: + // 1) premultiply old rotation tensor to get right-hand side. + + for ( int i = 0 ; i < 9 ; ++i ) { + rot_old[i] = rotation(ielem, i, previous_state); + } + + Scalar r_XX = rot_old[K_F_XX] + dt_half*( z3 * rot_old[K_F_YX] - z2 * rot_old[K_F_ZX] ); + Scalar r_YX = rot_old[K_F_YX] + dt_half*( z1 * rot_old[K_F_ZX] - z3 * rot_old[K_F_XX] ); + Scalar r_ZX = rot_old[K_F_ZX] + dt_half*( z2 * rot_old[K_F_XX] - z1 * rot_old[K_F_YX] ); + Scalar r_XY = rot_old[K_F_XY] + dt_half*( z3 * rot_old[K_F_YY] - z2 * rot_old[K_F_ZY] ); + Scalar r_YY = rot_old[K_F_YY] + dt_half*( z1 * rot_old[K_F_ZY] - z3 * rot_old[K_F_XY] ); + Scalar r_ZY = rot_old[K_F_ZY] + dt_half*( z2 * rot_old[K_F_XY] - z1 * rot_old[K_F_YY] ); + Scalar r_XZ = rot_old[K_F_XZ] + dt_half*( z3 * rot_old[K_F_YZ] - z2 * rot_old[K_F_ZZ] ); + Scalar r_YZ = rot_old[K_F_YZ] + dt_half*( z1 * rot_old[K_F_ZZ] - z3 * rot_old[K_F_XZ] ); + Scalar r_ZZ = rot_old[K_F_ZZ] + dt_half*( z2 * rot_old[K_F_XZ] - z1 * rot_old[K_F_YZ] ); + + + // 2) solve for new rotation tensor via gauss elimination. + // forward elimination - + Scalar a12 = - dt_half * z3; + Scalar a13 = dt_half * z2; + Scalar b32 = - dt_half * z1; + Scalar a22inv = 1.0 / (1.0 + a12 * a12); + + Scalar a13a12 = a13*a12; + Scalar a23 = b32 + a13a12; + r_YX += r_XX * a12; + r_YY += r_XY * a12; + r_YZ += r_XZ * a12; + + + b32 = (b32 - a13a12) * a22inv; + r_ZX += r_XX * a13 + r_YX * b32; + r_ZY += r_XY * a13 + r_YY * b32; + r_ZZ += r_XZ * a13 + r_YZ * b32; + + + // backward substitution - + const Scalar a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32); + + rot_new[K_F_ZX] = r_ZX * a33inv; + rot_new[K_F_ZY] = r_ZY * a33inv; + rot_new[K_F_ZZ] = r_ZZ * a33inv; + rot_new[K_F_YX] = ( r_YX - rot_new[K_F_ZX] * a23 ) * a22inv; + rot_new[K_F_YY] = ( r_YY - rot_new[K_F_ZY] * a23 ) * a22inv; + rot_new[K_F_YZ] = ( r_YZ - rot_new[K_F_ZZ] * a23 ) * a22inv; + rot_new[K_F_XX] = r_XX - rot_new[K_F_ZX] * a13 - rot_new[K_F_YX] * a12; + rot_new[K_F_XY] = r_XY - rot_new[K_F_ZY] * a13 - rot_new[K_F_YY] * a12; + rot_new[K_F_XZ] = r_XZ - rot_new[K_F_ZZ] * a13 - rot_new[K_F_YZ] * a12; + + for ( int i = 0 ; i < 9 ; ++i ) { + rotation(ielem, i, current_state) = rot_new[i] ; + } + + // update stretch tensor in the new configuration - + const Scalar a1 = str_ten[K_S_XY] + vort[K_V_XY]; + const Scalar a2 = str_ten[K_S_YZ] + vort[K_V_YZ]; + const Scalar a3 = str_ten[K_S_ZX] + vort[K_V_ZX]; + const Scalar b1 = str_ten[K_S_ZX] - vort[K_V_ZX]; + const Scalar b2 = str_ten[K_S_XY] - vort[K_V_XY]; + const Scalar b3 = str_ten[K_S_YZ] - vort[K_V_YZ]; + + const Scalar s_XX = str[K_S_XX]; + const Scalar s_YY = str[K_S_YY]; + const Scalar s_ZZ = str[K_S_ZZ]; + const Scalar s_XY = str[K_S_XY]; + const Scalar s_YZ = str[K_S_YZ]; + const Scalar s_ZX = str[K_S_ZX]; + + str[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX); + str[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY); + str[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ); + str[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX); + str[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY); + str[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ); + + } + + + KOKKOS_INLINE_FUNCTION + void rotate_tensor(int ielem, Scalar * str_ten, Scalar * str, Scalar * rot_new)const { + + Scalar t[9]; + Scalar rot_str[6]; // Rotated stretch + + t[0] = str_ten[K_S_XX]*rot_new[K_F_XX] + + str_ten[K_S_XY]*rot_new[K_F_YX] + + str_ten[K_S_XZ]*rot_new[K_F_ZX]; + + t[1] = str_ten[K_S_YX]*rot_new[K_F_XX] + + str_ten[K_S_YY]*rot_new[K_F_YX] + + str_ten[K_S_YZ]*rot_new[K_F_ZX]; + + t[2] = str_ten[K_S_ZX]*rot_new[K_F_XX] + + str_ten[K_S_ZY]*rot_new[K_F_YX] + + str_ten[K_S_ZZ]*rot_new[K_F_ZX]; + + t[3] = str_ten[K_S_XX]*rot_new[K_F_XY] + + str_ten[K_S_XY]*rot_new[K_F_YY] + + str_ten[K_S_XZ]*rot_new[K_F_ZY]; + + t[4] = str_ten[K_S_YX]*rot_new[K_F_XY] + + str_ten[K_S_YY]*rot_new[K_F_YY] + + str_ten[K_S_YZ]*rot_new[K_F_ZY]; + + t[5] = str_ten[K_S_ZX]*rot_new[K_F_XY] + + str_ten[K_S_ZY]*rot_new[K_F_YY] + + str_ten[K_S_ZZ]*rot_new[K_F_ZY]; + + t[6] = str_ten[K_S_XX]*rot_new[K_F_XZ] + + str_ten[K_S_XY]*rot_new[K_F_YZ] + + str_ten[K_S_XZ]*rot_new[K_F_ZZ]; + + t[7] = str_ten[K_S_YX]*rot_new[K_F_XZ] + + str_ten[K_S_YY]*rot_new[K_F_YZ] + + str_ten[K_S_YZ]*rot_new[K_F_ZZ]; + + t[8] = str_ten[K_S_ZX]*rot_new[K_F_XZ] + + str_ten[K_S_ZY]*rot_new[K_F_YZ] + + str_ten[K_S_ZZ]*rot_new[K_F_ZZ]; + + + rot_str[ K_S_XX ] = rot_new[K_F_XX] * t[0] + + rot_new[K_F_YX] * t[1] + + rot_new[K_F_ZX] * t[2]; + rot_str[ K_S_YY ] = rot_new[K_F_XY] * t[3] + + rot_new[K_F_YY] * t[4] + + rot_new[K_F_ZY] * t[5]; + rot_str[ K_S_ZZ ] = rot_new[K_F_XZ] * t[6] + + rot_new[K_F_YZ] * t[7] + + rot_new[K_F_ZZ] * t[8]; + + rot_str[ K_S_XY ] = rot_new[K_F_XX] * t[3] + + rot_new[K_F_YX] * t[4] + + rot_new[K_F_ZX] * t[5]; + rot_str[ K_S_YZ ] = rot_new[K_F_XY] * t[6] + + rot_new[K_F_YY] * t[7] + + rot_new[K_F_ZY] * t[8]; + rot_str[ K_S_ZX ] = rot_new[K_F_XZ] * t[0] + + rot_new[K_F_YZ] * t[1] + + rot_new[K_F_ZZ] * t[2]; + + for ( int i = 0 ; i < 6 ; ++i ) { + rot_stretch(ielem, i) = rot_str[i] ; + } + + for ( int i = 0 ; i < 6 ; ++i ) { + stretch(ielem, i) = str[i] ; + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( int ielem )const { + + // Local scratch space to avoid multiple + // accesses to global memory. + Scalar str_ten[6]; // Stretching tensor + Scalar str[6]; // Stretch + Scalar rot_old[9]; // Rotation old + Scalar rot_new[9]; // Rotation new + Scalar vort[3]; // Vorticity + Scalar v_gr[9]; // Velocity gradient + + additive_decomp(ielem, v_gr, str_ten); + + polar_decomp(ielem, v_gr, str_ten, str, vort, rot_old, rot_new); + + rotate_tensor(ielem, str_ten, str, rot_new); + } +}; + +//---------------------------------------------------------------------------- + +template<typename Scalar, class DeviceType > +struct internal_force +{ + typedef DeviceType execution_space ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + static const int ElemNodeCount = Fields::ElemNodeCount ; + + static const int K_F_XX = Fields::K_F_XX ; + static const int K_F_YY = Fields::K_F_YY ; + static const int K_F_ZZ = Fields::K_F_ZZ ; + static const int K_F_XY = Fields::K_F_XY ; + static const int K_F_YZ = Fields::K_F_YZ ; + static const int K_F_ZX = Fields::K_F_ZX ; + static const int K_F_YX = Fields::K_F_YX ; + static const int K_F_ZY = Fields::K_F_ZY ; + static const int K_F_XZ = Fields::K_F_XZ ; + + static const int K_S_XX = Fields::K_S_XX ; + static const int K_S_YY = Fields::K_S_YY ; + static const int K_S_ZZ = Fields::K_S_ZZ ; + static const int K_S_XY = Fields::K_S_XY ; + static const int K_S_YZ = Fields::K_S_YZ ; + static const int K_S_ZX = Fields::K_S_ZX ; + static const int K_S_YX = Fields::K_S_YX ; + static const int K_S_ZY = Fields::K_S_ZY ; + static const int K_S_XZ = Fields::K_S_XZ ; + + //-------------------------------------------------------------------------- + // Reduction: + + typedef Scalar value_type; + + KOKKOS_INLINE_FUNCTION + static void init(value_type &update) { + update = 1.0e32; + } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update, + const volatile value_type & source ) + { + update = update < source ? update : source; + } + + // Final serial processing of reduction value: + KOKKOS_INLINE_FUNCTION + void final( value_type & result ) const + { + *prev_dt = *dt ; + *dt = result ; + }; + + //-------------------------------------------------------------------------- + + // Global arrays used by this functor. + + const typename Fields::elem_node_ids_type elem_node_connectivity ; + const typename Fields::node_coords_type model_coords ; + const typename Fields::scalar_type dt ; + const typename Fields::scalar_type prev_dt ; + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; + const typename Fields::array_type elem_mass ; + const typename Fields::array_type internal_energy ; + const typename Fields::elem_sym_tensor_type stress_new ; + const typename Fields::elem_node_geom_type element_force ; + const typename Fields::elem_tensor_state_type rotation ; + const typename Fields::elem_sym_tensor_type rot_stretch ; + + const Scalar two_mu; + const Scalar bulk_modulus; + const Scalar lin_bulk_visc; + const Scalar quad_bulk_visc; + const Scalar user_dt; + const int current_state; + + internal_force( const Fields & mesh_fields, + const Scalar arg_user_dt, + const int arg_current_state ) + : elem_node_connectivity( mesh_fields.elem_node_connectivity ) + , model_coords( mesh_fields.model_coords ) + , dt( mesh_fields.dt ) + , prev_dt( mesh_fields.prev_dt ) + , displacement( mesh_fields.displacement ) + , velocity( mesh_fields.velocity ) + , elem_mass( mesh_fields.elem_mass ) + , internal_energy( mesh_fields.internal_energy ) + , stress_new( mesh_fields.stress_new ) + , element_force( mesh_fields.element_force ) + , rotation( mesh_fields.rotation ) + , rot_stretch( mesh_fields.rot_stretch ) + , two_mu( mesh_fields.two_mu ) + , bulk_modulus( mesh_fields.bulk_modulus ) + , lin_bulk_visc( mesh_fields.lin_bulk_visc ) + , quad_bulk_visc( mesh_fields.quad_bulk_visc ) + , user_dt( arg_user_dt ) + , current_state( arg_current_state ) + {} + + static void apply( const Fields & mesh_fields , + const Scalar arg_user_dt, + const int arg_current_state ) + { + internal_force op_force( mesh_fields , arg_user_dt , arg_current_state ); + + Kokkos::parallel_reduce( mesh_fields.num_elements, op_force ); + } + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void rotate_tensor_backward(int ielem , + const Scalar * const s_n , + Scalar * const rot_stress )const + { + const int rot_state = current_state ; // 1 ; + + // t : temporary variables + // s_n : stress_new in local memory space + // r_n : rotation_new in local memory space + Scalar t[9], r_n[9]; + + r_n[0] = rotation(ielem, 0, rot_state ); + r_n[1] = rotation(ielem, 1, rot_state ); + r_n[2] = rotation(ielem, 2, rot_state ); + r_n[3] = rotation(ielem, 3, rot_state ); + r_n[4] = rotation(ielem, 4, rot_state ); + r_n[5] = rotation(ielem, 5, rot_state ); + r_n[6] = rotation(ielem, 6, rot_state ); + r_n[7] = rotation(ielem, 7, rot_state ); + r_n[8] = rotation(ielem, 8, rot_state ); + + t[0] = s_n[K_S_XX]*r_n[K_F_XX]+ s_n[K_S_XY]*r_n[K_F_XY]+ s_n[K_S_XZ]*r_n[K_F_XZ]; + t[1] = s_n[K_S_YX]*r_n[K_F_XX]+ s_n[K_S_YY]*r_n[K_F_XY]+ s_n[K_S_YZ]*r_n[K_F_XZ]; + t[2] = s_n[K_S_ZX]*r_n[K_F_XX]+ s_n[K_S_ZY]*r_n[K_F_XY]+ s_n[K_S_ZZ]*r_n[K_F_XZ]; + t[3] = s_n[K_S_XX]*r_n[K_F_YX]+ s_n[K_S_XY]*r_n[K_F_YY]+ s_n[K_S_XZ]*r_n[K_F_YZ]; + t[4] = s_n[K_S_YX]*r_n[K_F_YX]+ s_n[K_S_YY]*r_n[K_F_YY]+ s_n[K_S_YZ]*r_n[K_F_YZ]; + t[5] = s_n[K_S_ZX]*r_n[K_F_YX]+ s_n[K_S_ZY]*r_n[K_F_YY]+ s_n[K_S_ZZ]*r_n[K_F_YZ]; + t[6] = s_n[K_S_XX]*r_n[K_F_ZX]+ s_n[K_S_XY]*r_n[K_F_ZY]+ s_n[K_S_XZ]*r_n[K_F_ZZ]; + t[7] = s_n[K_S_YX]*r_n[K_F_ZX]+ s_n[K_S_YY]*r_n[K_F_ZY]+ s_n[K_S_YZ]*r_n[K_F_ZZ]; + t[8] = s_n[K_S_ZX]*r_n[K_F_ZX]+ s_n[K_S_ZY]*r_n[K_F_ZY]+ s_n[K_S_ZZ]*r_n[K_F_ZZ]; + + rot_stress[ K_S_XX ] = r_n[K_F_XX]*t[0] + r_n[K_F_XY]*t[1] + r_n[K_F_XZ]*t[2]; + rot_stress[ K_S_YY ] = r_n[K_F_YX]*t[3] + r_n[K_F_YY]*t[4] + r_n[K_F_YZ]*t[5]; + rot_stress[ K_S_ZZ ] = r_n[K_F_ZX]*t[6] + r_n[K_F_ZY]*t[7] + r_n[K_F_ZZ]*t[8]; + + rot_stress[ K_S_XY ] = r_n[K_F_XX]*t[3] + r_n[K_F_XY]*t[4] + r_n[K_F_XZ]*t[5]; + rot_stress[ K_S_YZ ] = r_n[K_F_YX]*t[6] + r_n[K_F_YY]*t[7] + r_n[K_F_YZ]*t[8]; + rot_stress[ K_S_ZX ] = r_n[K_F_ZX]*t[0] + r_n[K_F_ZY]*t[1] + r_n[K_F_ZZ]*t[2]; + } + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void comp_force(int ielem, + const Scalar * const vx , + const Scalar * const vy , + const Scalar * const vz , + const Scalar * const grad_x , + const Scalar * const grad_y , + const Scalar * const grad_z , + Scalar * total_stress12th ) const + { + Scalar internal_energy_inc = 0 ; + + for(int inode = 0; inode < 8; ++inode) { + + const Scalar fx = + total_stress12th[K_S_XX] * grad_x[inode] + + total_stress12th[K_S_XY] * grad_y[inode] + + total_stress12th[K_S_XZ] * grad_z[inode] ; + + element_force(ielem, 0, inode) = fx ; + + const Scalar fy = + total_stress12th[K_S_YX] * grad_x[inode] + + total_stress12th[K_S_YY] * grad_y[inode] + + total_stress12th[K_S_YZ] * grad_z[inode] ; + + element_force(ielem, 1, inode) = fy ; + + const Scalar fz = + total_stress12th[K_S_ZX] * grad_x[inode] + + total_stress12th[K_S_ZY] * grad_y[inode] + + total_stress12th[K_S_ZZ] * grad_z[inode] ; + + element_force(ielem, 2, inode) = fz ; + + internal_energy_inc += + fx * vx[inode] + + fy * vy[inode] + + fz * vz[inode] ; + } + + internal_energy(ielem) = internal_energy_inc ; + } + + //---------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void get_stress(int ielem , Scalar * const s_n ) const + { + const int kxx = 0; + const int kyy = 1; + const int kzz = 2; + const int kxy = 3; + const int kyz = 4; + const int kzx = 5; + + const Scalar e = (rot_stretch(ielem,kxx)+rot_stretch(ielem,kyy)+rot_stretch(ielem,kzz))/3.0; + + s_n[kxx] = stress_new(ielem,kxx) += *dt * (two_mu * (rot_stretch(ielem,kxx)-e)+3*bulk_modulus*e); + s_n[kyy] = stress_new(ielem,kyy) += *dt * (two_mu * (rot_stretch(ielem,kyy)-e)+3*bulk_modulus*e); + s_n[kzz] = stress_new(ielem,kzz) += *dt * (two_mu * (rot_stretch(ielem,kzz)-e)+3*bulk_modulus*e); + + s_n[kxy] = stress_new(ielem,kxy) += *dt * two_mu * rot_stretch(ielem,kxy); + s_n[kyz] = stress_new(ielem,kyz) += *dt * two_mu * rot_stretch(ielem,kyz); + s_n[kzx] = stress_new(ielem,kzx) += *dt * two_mu * rot_stretch(ielem,kzx); + } + + //---------------------------------------------------------------------------- + + + KOKKOS_INLINE_FUNCTION + void operator()( int ielem, value_type & update )const + { + const Scalar ONE12TH = 1.0 / 12.0 ; + + Scalar x[8], y[8], z[8] ; + Scalar vx[8], vy[8], vz[8]; + Scalar grad_x[8], grad_y[8], grad_z[8]; + + // Position and velocity: + + for ( int i = 0 ; i < ElemNodeCount ; ++i ) { + const int n = elem_node_connectivity(ielem,i); + + x[i] = model_coords(n, 0) + displacement(n, 0, current_state) ; + y[i] = model_coords(n, 1) + displacement(n, 1, current_state) ; + z[i] = model_coords(n, 2) + displacement(n, 2, current_state) ; + + vx[i] = velocity(n, 0, current_state); + vy[i] = velocity(n, 1, current_state); + vz[i] = velocity(n, 2, current_state); + } + + // Gradient: + + comp_grad<Scalar,execution_space>( x , y , z , grad_x , grad_y , grad_z ); + + + const Scalar mid_vol = dot8<Scalar,execution_space>( x , grad_x ); + + const Scalar shr = two_mu ; + const Scalar dil = bulk_modulus + ((2.0*shr)/3.0); + + const Scalar aspect = 6.0 * mid_vol / + ( dot8<Scalar,execution_space>( grad_x , grad_x ) + + dot8<Scalar,execution_space>( grad_y , grad_y ) + + dot8<Scalar,execution_space>( grad_z , grad_z ) ); + + const Scalar dtrial = sqrt(elem_mass(ielem) * aspect / dil); + const Scalar traced = (rot_stretch(ielem, 0) + rot_stretch(ielem, 1) + rot_stretch(ielem, 2)); + + const Scalar eps = traced < 0 ? (lin_bulk_visc - quad_bulk_visc * traced * dtrial) : lin_bulk_visc ; + + const Scalar bulkq = eps * dil * dtrial * traced; + + Scalar cur_time_step = dtrial * ( sqrt( 1.0 + eps * eps) - eps); + + // force fixed time step if input + + cur_time_step = user_dt > 0 ? user_dt : cur_time_step; + + update = update < cur_time_step ? update : cur_time_step; + + + Scalar s_n[ 6 ]; + + get_stress( ielem, s_n ); + + Scalar total_stress12th[6]; + + // Get rotated stress: + + rotate_tensor_backward(ielem, s_n , total_stress12th ); + + total_stress12th[0] = ONE12TH*( total_stress12th[ 0 ] + bulkq ); + total_stress12th[1] = ONE12TH*( total_stress12th[ 1 ] + bulkq ); + total_stress12th[2] = ONE12TH*( total_stress12th[ 2 ] + bulkq ); + total_stress12th[3] = ONE12TH*( total_stress12th[ 3 ] ); + total_stress12th[4] = ONE12TH*( total_stress12th[ 4 ] ); + total_stress12th[5] = ONE12TH*( total_stress12th[ 5 ] ); + + comp_force(ielem, vx, vy, vz, + grad_x, grad_y, grad_z, total_stress12th); + } +}; + +//---------------------------------------------------------------------------- + +template<typename Scalar, class DeviceType > +struct nodal_step +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + const typename Fields::scalar_type dt ; + const typename Fields::scalar_type prev_dt ; + const typename Fields::node_elem_ids_type node_elem_connectivity ; + const typename Fields::node_coords_type model_coords ; + const typename Fields::array_type nodal_mass ; + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; + const typename Fields::geom_array_type acceleration ; + const typename Fields::geom_array_type internal_force ; + const typename Fields::elem_node_geom_type element_force ; + + const Scalar x_bc; + const int current_state; + const int next_state; + + + nodal_step( const Fields & mesh_fields , + const Scalar arg_x_bc, + const int arg_current_state, + const int arg_next_state) + : dt( mesh_fields.dt ) + , prev_dt( mesh_fields.prev_dt ) + , node_elem_connectivity( mesh_fields.node_elem_connectivity ) + , model_coords( mesh_fields.model_coords ) + , nodal_mass( mesh_fields.nodal_mass ) + , displacement( mesh_fields.displacement ) + , velocity( mesh_fields.velocity ) + , acceleration( mesh_fields.acceleration ) + , internal_force( mesh_fields.internal_force ) + , element_force( mesh_fields.element_force ) + , x_bc( arg_x_bc ) + , current_state( arg_current_state ) + , next_state( arg_next_state ) + { + //std::cout << "finish_step dt: " << dt << std::endl; + //std::cout << "finish_step prev_dt: " << prev_dt << std::endl; + } + + static void apply( const Fields & mesh_fields , + const Scalar arg_x_bc , + const int arg_current_state , + const int arg_next_state ) + { + nodal_step op( mesh_fields, arg_x_bc, arg_current_state, arg_next_state ); + + // Only update the owned nodes: + + Kokkos::parallel_for( mesh_fields.num_nodes_owned , op ); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int inode) const + { + // Getting count as per 'CSR-like' data structure + const int begin = node_elem_connectivity.row_map[inode]; + const int end = node_elem_connectivity.row_map[inode+1]; + + double local_force[] = {0.0, 0.0, 0.0}; + + // Gather-sum internal force from + // each element that a node is attached to. + + for ( int i = begin; i < end ; ++i ){ + + // node_elem_offset is a cumulative structure, so + // node_elem_offset(inode) should be the index where + // a particular row's elem_IDs begin + const int nelem = node_elem_connectivity.entries( i, 0); + + // find the row in an element's stiffness matrix + // that corresponds to inode + const int elem_node_index = node_elem_connectivity.entries( i, 1); + + local_force[0] += element_force(nelem, 0, elem_node_index); + local_force[1] += element_force(nelem, 1, elem_node_index); + local_force[2] += element_force(nelem, 2, elem_node_index); + } + + internal_force(inode, 0) = local_force[0]; + internal_force(inode, 1) = local_force[1]; + internal_force(inode, 2) = local_force[2]; + + // Acceleration: + + Scalar v_new[3]; + Scalar a_current[3]; + + const Scalar tol = 1.0e-7; + + // If not on the boundary then: a = F / m + if ( tol < fabs(model_coords(inode,0)-x_bc) ) { + + const Scalar m = nodal_mass( inode ); + + acceleration(inode,0) = a_current[0] = -local_force[0] / m ; + acceleration(inode,1) = a_current[1] = -local_force[1] / m ; + acceleration(inode,2) = a_current[2] = -local_force[2] / m ; + } + else { //enforce fixed BC + acceleration(inode,0) = a_current[0] = 0; + acceleration(inode,1) = a_current[1] = 0; + acceleration(inode,2) = a_current[2] = 0; + } + + // Central difference time integration: + + const Scalar dt_disp = *dt ; + const Scalar dt_vel = ( *dt + *prev_dt ) / 2.0 ; + + velocity(inode,0,next_state) = v_new[0] = + velocity(inode,0,current_state) + dt_vel * a_current[0]; + + velocity(inode,1,next_state) = v_new[1] = + velocity(inode,1,current_state) + dt_vel * a_current[1]; + + velocity(inode,2,next_state) = v_new[2] = + velocity(inode,2,current_state) + dt_vel * a_current[2]; + + displacement(inode,0,next_state) = + displacement(inode,0,current_state) + dt_disp * v_new[0]; + + displacement(inode,1,next_state) = + displacement(inode,1,current_state) + dt_disp * v_new[1]; + + displacement(inode,2,next_state) = + displacement(inode,2,current_state) + dt_disp * v_new[2]; + } +}; + +//---------------------------------------------------------------------------- + +template< typename Scalar , class DeviceType > +struct pack_state +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + typedef typename Fields::geom_state_array_type::value_type value_type ; + typedef Kokkos::View< value_type* , execution_space > buffer_type ; + + static const unsigned value_count = 6 ; + + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; + const buffer_type output ; + const size_type inode_base ; + const size_type state_next ; + + pack_state( const buffer_type & arg_output , + const Fields & mesh_fields , + const size_type arg_begin , + const size_type arg_state ) + : displacement( mesh_fields.displacement ) + , velocity( mesh_fields.velocity ) + , output( arg_output ) + , inode_base( arg_begin ) + , state_next( arg_state ) + {} + + static void apply( const buffer_type & arg_output , + const size_type arg_begin , + const size_type arg_count , + const Fields & mesh_fields , + const size_type arg_state ) + { + pack_state op( arg_output , mesh_fields , arg_begin , arg_state ); + + Kokkos::parallel_for( arg_count , op ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { + const size_type inode = inode_base + i ; + + size_type j = i * value_count ; + + output[j++] = displacement( inode , 0 , state_next ); + output[j++] = displacement( inode , 1 , state_next ); + output[j++] = displacement( inode , 2 , state_next ); + output[j++] = velocity( inode , 0 , state_next ); + output[j++] = velocity( inode , 1 , state_next ); + output[j++] = velocity( inode , 2 , state_next ); + } +}; + +template< typename Scalar , class DeviceType > +struct unpack_state +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef Explicit::Fields< Scalar , execution_space > Fields ; + + typedef typename Fields::geom_state_array_type::value_type value_type ; + typedef Kokkos::View< value_type* , execution_space > buffer_type ; + + static const unsigned value_count = 6 ; + + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; + const buffer_type input ; + const size_type inode_base ; + const size_type state_next ; + + unpack_state( const buffer_type & arg_input , + const Fields & mesh_fields , + const size_type arg_begin , + const size_type arg_state ) + : displacement( mesh_fields.displacement ) + , velocity( mesh_fields.velocity ) + , input( arg_input ) + , inode_base( arg_begin ) + , state_next( arg_state ) + {} + + static void apply( const Fields & mesh_fields , + const size_type arg_state , + const buffer_type & arg_input , + const size_type arg_begin , + const size_type arg_count ) + { + unpack_state op( arg_input , mesh_fields , arg_begin , arg_state ); + + Kokkos::parallel_for( arg_count , op ); + } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { + const size_type inode = inode_base + i ; + + size_type j = i * value_count ; + + displacement( inode , 0 , state_next ) = input[j++] ; + displacement( inode , 1 , state_next ) = input[j++] ; + displacement( inode , 2 , state_next ) = input[j++] ; + velocity( inode , 0 , state_next ) = input[j++] ; + velocity( inode , 1 , state_next ) = input[j++] ; + velocity( inode , 2 , state_next ) = input[j++] ; + } +}; + +} /* namespace Explicit */ + +#endif /* #ifndef KOKKOS_EXPLICITFUNCTORS_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/FEMesh.hpp b/lib/kokkos/example/multi_fem/FEMesh.hpp new file mode 100644 index 0000000000000000000000000000000000000000..33468e2fbd7a3739f92bdb9473b0ae01b60b1311 --- /dev/null +++ b/lib/kokkos/example/multi_fem/FEMesh.hpp @@ -0,0 +1,86 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FEMESH_HPP +#define KOKKOS_FEMESH_HPP + +#include <utility> +#include <limits> +#include <iostream> +#include <sstream> +#include <stdexcept> + +#include <Kokkos_Core.hpp> +#include <Kokkos_StaticCrsGraph.hpp> + +#include <ParallelComm.hpp> +#include <ParallelDataMap.hpp> + +namespace HybridFEM { + +//---------------------------------------------------------------------------- +/** \brief Finite element mesh fixture for hybrid parallel performance tests. + */ +template< typename CoordScalarType , unsigned ElemNodeCount , class Device > +struct FEMesh { + + typedef typename Device::size_type size_type ; + + static const size_type element_node_count = ElemNodeCount ; + + typedef Kokkos::View< CoordScalarType*[3] , Device > node_coords_type ; + typedef Kokkos::View< size_type*[ElemNodeCount], Device > elem_node_ids_type ; + typedef Kokkos::StaticCrsGraph< size_type[2] , Device > node_elem_ids_type ; + + node_coords_type node_coords ; + elem_node_ids_type elem_node_ids ; + node_elem_ids_type node_elem_ids ; + Kokkos::ParallelDataMap parallel_data_map ; +}; + +//---------------------------------------------------------------------------- + +} /* namespace HybridFEM */ + +#endif /* #ifndef KOKKOS_FEMESH_HPP */ + diff --git a/lib/kokkos/example/multi_fem/HexElement.hpp b/lib/kokkos/example/multi_fem/HexElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7dec087cbdad3334d2dc264ca8560cc5e3b5ea3b --- /dev/null +++ b/lib/kokkos/example/multi_fem/HexElement.hpp @@ -0,0 +1,268 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef ELEMENTHEX_HPP +#define ELEMENTHEX_HPP + +namespace HybridFEM { + +template< unsigned NodeCount > +class HexElement_TensorData ; + +template< unsigned NodeCount , class Device > +class HexElement_TensorEval ; + +//---------------------------------------------------------------------------- +/** \brief Evaluate Hex element on interval [-1,1]^3 */ +template<> +class HexElement_TensorData< 8 > { +public: + + static const unsigned element_node_count = 8 ; + static const unsigned spatial_dimension = 3 ; + static const unsigned integration_count_1d = 2 ; + static const unsigned function_count_1d = 2 ; + + float values_1d [ function_count_1d ][ integration_count_1d ]; + float derivs_1d [ function_count_1d ][ integration_count_1d ]; + float weights_1d[ integration_count_1d ]; + + unsigned char eval_map[ element_node_count ][4] ; + + static float eval_value_1d( const unsigned jf , const float x ) + { + return 0 == jf ? 0.5 * ( 1.0 - x ) : ( + 1 == jf ? 0.5 * ( 1.0 + x ) : 0 ); + } + + static float eval_deriv_1d( const unsigned jf , const float ) + { + return 0 == jf ? -0.5 : ( + 1 == jf ? 0.5 : 0 ); + } + + HexElement_TensorData() + { + const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] = + { { 0 , 0 , 0 }, + { 1 , 0 , 0 }, + { 1 , 1 , 0 }, + { 0 , 1 , 0 }, + { 0 , 0 , 1 }, + { 1 , 0 , 1 }, + { 1 , 1 , 1 }, + { 0 , 1 , 1 } }; + + weights_1d[0] = 1 ; + weights_1d[1] = 1 ; + + const float points_1d[ integration_count_1d ] = + { -0.577350269 , 0.577350269 }; + + for ( unsigned i = 0 ; i < element_node_count ; ++i ) { + eval_map[i][0] = tmp_map[i][0]; + eval_map[i][1] = tmp_map[i][1]; + eval_map[i][2] = tmp_map[i][2]; + } + + for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) { + for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) { + values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] ); + derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] ); + }} + } +}; + +//---------------------------------------------------------------------------- + +template<> +class HexElement_TensorData< 27 > { +public: + + static const unsigned element_node_count = 27 ; + static const unsigned spatial_dimension = 3 ; + static const unsigned integration_count_1d = 3 ; + static const unsigned function_count_1d = 3 ; + + float values_1d [ function_count_1d ][ integration_count_1d ]; + float derivs_1d [ function_count_1d ][ integration_count_1d ]; + float weights_1d[ integration_count_1d ]; + + unsigned char eval_map[ element_node_count ][4] ; + + // sizeof(EvaluateElementHex) = 111 bytes = + // sizeof(float) * 9 + + // sizeof(float) * 9 + + // sizeof(float) * 3 + + // sizeof(char) * 27 + + static float eval_value_1d( const unsigned jf , const float p ) + { + return 0 == jf ? 0.5 * p * ( p - 1 ) : ( + 1 == jf ? 1.0 - p * p : ( + 2 == jf ? 0.5 * p * ( p + 1 ) : 0 )); + } + + static float eval_deriv_1d( const unsigned jf , const float p ) + { + return 0 == jf ? p - 0.5 : ( + 1 == jf ? -2.0 * p : ( + 2 == jf ? p + 0.5 : 0 )); + } + + HexElement_TensorData() + { + const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] = + { { 0 , 0 , 0 }, + { 2 , 0 , 0 }, + { 2 , 2 , 0 }, + { 0 , 2 , 0 }, + { 0 , 0 , 2 }, + { 2 , 0 , 2 }, + { 2 , 2 , 2 }, + { 0 , 2 , 2 }, + { 1 , 0 , 0 }, + { 2 , 1 , 0 }, + { 1 , 2 , 0 }, + { 0 , 1 , 0 }, + { 0 , 0 , 1 }, + { 2 , 0 , 1 }, + { 2 , 2 , 1 }, + { 0 , 2 , 1 }, + { 1 , 0 , 2 }, + { 2 , 1 , 2 }, + { 1 , 2 , 2 }, + { 0 , 1 , 2 }, + { 1 , 1 , 1 }, + { 1 , 1 , 0 }, + { 1 , 1 , 2 }, + { 0 , 1 , 1 }, + { 2 , 1 , 1 }, + { 1 , 0 , 1 }, + { 1 , 2 , 1 } }; + + // Interval [-1,1] + + weights_1d[0] = 0.555555556 ; + weights_1d[1] = 0.888888889 ; + weights_1d[2] = 0.555555556 ; + + const float points_1d[3] = { -0.774596669 , + 0.000000000 , + 0.774596669 }; + + for ( unsigned i = 0 ; i < element_node_count ; ++i ) { + eval_map[i][0] = tmp_map[i][0]; + eval_map[i][1] = tmp_map[i][1]; + eval_map[i][2] = tmp_map[i][2]; + } + + for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) { + for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) { + values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] ); + derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] ); + }} + } +}; + +//---------------------------------------------------------------------------- + +template< unsigned NodeCount > +class HexElement_Data { +public: + static const unsigned spatial_dimension = 3 ; + static const unsigned element_node_count = NodeCount ; + static const unsigned integration_count = NodeCount ; + static const unsigned function_count = NodeCount ; + + float weights[ integration_count ] ; + float values[ integration_count ][ function_count ]; + float gradients[ integration_count ][ spatial_dimension ][ function_count ]; + + HexElement_Data() + { + HexElement_TensorData< NodeCount > tensor_data ; + + for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) { + + const unsigned ipx = tensor_data.eval_map[ip][0] ; + const unsigned ipy = tensor_data.eval_map[ip][1] ; + const unsigned ipz = tensor_data.eval_map[ip][2] ; + + weights[ip] = tensor_data.weights_1d[ ipx ] * + tensor_data.weights_1d[ ipy ] * + tensor_data.weights_1d[ ipz ] ; + + for ( unsigned jf = 0 ; jf < function_count ; ++jf ) { + + const unsigned jfx = tensor_data.eval_map[jf][0] ; + const unsigned jfy = tensor_data.eval_map[jf][1] ; + const unsigned jfz = tensor_data.eval_map[jf][2] ; + + values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.derivs_1d[ ipy ][ jfy ] * + tensor_data.values_1d[ ipz ][ jfz ] ; + + gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] * + tensor_data.values_1d[ ipy ][ jfy ] * + tensor_data.derivs_1d[ ipz ][ jfz ] ; + } + } + } +}; + +//---------------------------------------------------------------------------- + +} /* namespace HybridFEM */ + +#endif /* #ifndef ELEMENTHEX_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp b/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3c4ca582245f687c360eb8812d899ff33a6a1644 --- /dev/null +++ b/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp @@ -0,0 +1,443 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP +#define KOKKOS_HEXEXPLICITFUNCTIONS_HPP + +#include <math.h> + +namespace Explicit { + +struct Hex8Functions +{ + static const unsigned SpatialDim = 3 ; + static const unsigned ElemNodeCount = 8 ; + + // Indices for full 3x3 tensor: + + static const unsigned K_F_XX = 0 ; + static const unsigned K_F_YY = 1 ; + static const unsigned K_F_ZZ = 2 ; + static const unsigned K_F_XY = 3 ; + static const unsigned K_F_YZ = 4 ; + static const unsigned K_F_ZX = 5 ; + static const unsigned K_F_YX = 6 ; + static const unsigned K_F_ZY = 7 ; + static const unsigned K_F_XZ = 8 ; + static const unsigned K_F_SIZE = 9 ; + + // Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector + + static const unsigned K_S_XX = 0 ; + static const unsigned K_S_YY = 1 ; + static const unsigned K_S_ZZ = 2 ; + static const unsigned K_S_XY = 3 ; + static const unsigned K_S_YZ = 4 ; + static const unsigned K_S_ZX = 5 ; + static const unsigned K_S_YX = 3 ; + static const unsigned K_S_ZY = 4 ; + static const unsigned K_S_XZ = 5 ; + static const unsigned K_S_SIZE = 6 ; + + // Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector + + static const unsigned K_V_XY = 0 ; + static const unsigned K_V_YZ = 1 ; + static const unsigned K_V_ZX = 2 ; + static const unsigned K_V_SIZE = 3 ; + + //-------------------------------------------------------------------------- + + template< typename ScalarA , typename ScalarB > + KOKKOS_INLINE_FUNCTION static + double dot8( const ScalarA * const a , const ScalarB * const b ) + { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] + + a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; } + + //-------------------------------------------------------------------------- + + template< class ScalarPrecise , + class ScalarCompact > + KOKKOS_INLINE_FUNCTION static + void grad( const ScalarPrecise x[] , + const ScalarPrecise z[] , + ScalarCompact grad_y[] ) + { + const ScalarCompact R42=(x[3] - x[1]); + const ScalarCompact R52=(x[4] - x[1]); + const ScalarCompact R54=(x[4] - x[3]); + + const ScalarCompact R63=(x[5] - x[2]); + const ScalarCompact R83=(x[7] - x[2]); + const ScalarCompact R86=(x[7] - x[5]); + + const ScalarCompact R31=(x[2] - x[0]); + const ScalarCompact R61=(x[5] - x[0]); + const ScalarCompact R74=(x[6] - x[3]); + + const ScalarCompact R72=(x[6] - x[1]); + const ScalarCompact R75=(x[6] - x[4]); + const ScalarCompact R81=(x[7] - x[0]); + + const ScalarCompact t1=(R63 + R54); + const ScalarCompact t2=(R61 + R74); + const ScalarCompact t3=(R72 + R81); + + const ScalarCompact t4 =(R86 + R42); + const ScalarCompact t5 =(R83 + R52); + const ScalarCompact t6 =(R75 + R31); + + // Calculate Y gradient from X and Z data + + grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54); + grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61); + grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72); + grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83); + grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61); + grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72); + grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83); + grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54); + } + + template< class ScalarPrecise , + class ScalarCompact > + static KOKKOS_INLINE_FUNCTION + void grad( const ScalarPrecise x[] , + const ScalarPrecise y[] , + const ScalarPrecise z[] , + ScalarCompact grad_x[] , + ScalarCompact grad_y[] , + ScalarCompact grad_z[] ) + { + grad( x , z , grad_y ); + grad( z , y , grad_x ); + grad( y , x , grad_z ); + } + + //-------------------------------------------------------------------------- + + template< class ScalarPrecise , + class ScalarCompact > + KOKKOS_INLINE_FUNCTION static + void polar_decomp( const float dt , + const ScalarCompact v_gr[] , + ScalarPrecise stretch[] /* INOUT */ , + ScalarCompact str_ten[] /* OUT */ , + ScalarCompact rot[] /* OUT */ ) + { + const float dt_half = 0.5 * dt; + + ScalarCompact vort[ K_V_SIZE ]; // Vorticity + + // Symmetric part + str_ten[K_S_XX] = v_gr[K_F_XX]; + str_ten[K_S_YY] = v_gr[K_F_YY]; + str_ten[K_S_ZZ] = v_gr[K_F_ZZ]; + str_ten[K_S_XY] = 0.5 * ( v_gr[K_F_XY] + v_gr[K_F_YX] ); + str_ten[K_S_YZ] = 0.5 * ( v_gr[K_F_YZ] + v_gr[K_F_ZY] ); + str_ten[K_S_ZX] = 0.5 * ( v_gr[K_F_ZX] + v_gr[K_F_XZ] ); + + // Skew Symmetric part + vort[K_V_XY] = 0.5 * ( v_gr[K_F_XY] - v_gr[K_F_YX] ); + vort[K_V_YZ] = 0.5 * ( v_gr[K_F_YZ] - v_gr[K_F_ZY] ); + vort[K_V_ZX] = 0.5 * ( v_gr[K_F_ZX] - v_gr[K_F_XZ] ); + + // calculate the rates of rotation via gauss elimination. + + ScalarCompact z1 = str_ten[K_S_XY] * stretch[K_S_ZX] - + str_ten[K_S_ZX] * stretch[K_S_XY] + + str_ten[K_S_YY] * stretch[K_S_YZ] - + str_ten[K_S_YZ] * stretch[K_S_YY] + + str_ten[K_S_YZ] * stretch[K_S_ZZ] - + str_ten[K_S_ZZ] * stretch[K_S_YZ]; + + ScalarCompact z2 = str_ten[K_S_ZX] * stretch[K_S_XX] - + str_ten[K_S_XX] * stretch[K_S_ZX] + + str_ten[K_S_YZ] * stretch[K_S_XY] - + str_ten[K_S_XY] * stretch[K_S_YZ] + + str_ten[K_S_ZZ] * stretch[K_S_ZX] - + str_ten[K_S_ZX] * stretch[K_S_ZZ]; + + ScalarCompact z3 = str_ten[K_S_XX] * stretch[K_S_XY] - + str_ten[K_S_XY] * stretch[K_S_XX] + + str_ten[K_S_XY] * stretch[K_S_YY] - + str_ten[K_S_YY] * stretch[K_S_XY] + + str_ten[K_S_ZX] * stretch[K_S_YZ] - + str_ten[K_S_YZ] * stretch[K_S_ZX]; + + { + // forward elimination + + const ScalarCompact a1inv = 1.0 / (stretch[K_S_YY] + stretch[K_S_ZZ]); + const ScalarCompact a4BYa1 = -1 * stretch[K_S_XY] * a1inv; + const ScalarCompact a2inv = 1.0 / (stretch[K_S_ZZ] + stretch[K_S_XX] + stretch[K_S_XY] * a4BYa1); + + const ScalarCompact a5 = -stretch[K_S_YZ] + stretch[K_S_ZX] * a4BYa1; + + z2 -= z1 * a4BYa1; + const ScalarCompact a6BYa1 = -1 * stretch[K_S_ZX] * a1inv; + const ScalarCompact a5BYa2 = a5 * a2inv; + z3 -= z1 * a6BYa1 - z2 * a5BYa2; + + // backward substitution - + + z3 /= (stretch[K_S_XX] + stretch[K_S_YY] + stretch[K_S_ZX] * a6BYa1 + a5 * a5BYa2); + z2 = (z2 - a5 * z3) * a2inv; + z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2); + } + + // calculate rotation rates - recall that spin_rate is an asymmetric tensor, + // so compute spin rate vector as dual of spin rate tensor, + // i.e w_i = e_ijk * spin_rate_jk + + z1 += vort[K_V_YZ]; + z2 += vort[K_V_ZX]; + z3 += vort[K_V_XY]; + + { + // update rotation tensor: + // 1) premultiply old rotation tensor to get right-hand side. + + ScalarCompact r_XX = rot[K_F_XX] + dt_half*( z3 * rot[K_F_YX] - z2 * rot[K_F_ZX] ); + ScalarCompact r_YX = rot[K_F_YX] + dt_half*( z1 * rot[K_F_ZX] - z3 * rot[K_F_XX] ); + ScalarCompact r_ZX = rot[K_F_ZX] + dt_half*( z2 * rot[K_F_XX] - z1 * rot[K_F_YX] ); + ScalarCompact r_XY = rot[K_F_XY] + dt_half*( z3 * rot[K_F_YY] - z2 * rot[K_F_ZY] ); + ScalarCompact r_YY = rot[K_F_YY] + dt_half*( z1 * rot[K_F_ZY] - z3 * rot[K_F_XY] ); + ScalarCompact r_ZY = rot[K_F_ZY] + dt_half*( z2 * rot[K_F_XY] - z1 * rot[K_F_YY] ); + ScalarCompact r_XZ = rot[K_F_XZ] + dt_half*( z3 * rot[K_F_YZ] - z2 * rot[K_F_ZZ] ); + ScalarCompact r_YZ = rot[K_F_YZ] + dt_half*( z1 * rot[K_F_ZZ] - z3 * rot[K_F_XZ] ); + ScalarCompact r_ZZ = rot[K_F_ZZ] + dt_half*( z2 * rot[K_F_XZ] - z1 * rot[K_F_YZ] ); + + + // 2) solve for new rotation tensor via gauss elimination. + // forward elimination - + + const ScalarCompact a12 = - dt_half * z3; + const ScalarCompact a13 = dt_half * z2; + ScalarCompact b32 = - dt_half * z1; + const ScalarCompact a22inv = 1.0 / (1.0 + a12 * a12); + + const ScalarCompact a13a12 = a13*a12; + const ScalarCompact a23 = b32 + a13a12; + + r_YX += r_XX * a12; + r_YY += r_XY * a12; + r_YZ += r_XZ * a12; + + b32 = (b32 - a13a12) * a22inv; + + r_ZX += r_XX * a13 + r_YX * b32; + r_ZY += r_XY * a13 + r_YY * b32; + r_ZZ += r_XZ * a13 + r_YZ * b32; + + // backward substitution - + + const ScalarCompact a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32); + + rot[K_F_ZX] = r_ZX * a33inv; + rot[K_F_ZY] = r_ZY * a33inv; + rot[K_F_ZZ] = r_ZZ * a33inv; + rot[K_F_YX] = ( r_YX - rot[K_F_ZX] * a23 ) * a22inv; + rot[K_F_YY] = ( r_YY - rot[K_F_ZY] * a23 ) * a22inv; + rot[K_F_YZ] = ( r_YZ - rot[K_F_ZZ] * a23 ) * a22inv; + rot[K_F_XX] = r_XX - rot[K_F_ZX] * a13 - rot[K_F_YX] * a12; + rot[K_F_XY] = r_XY - rot[K_F_ZY] * a13 - rot[K_F_YY] * a12; + rot[K_F_XZ] = r_XZ - rot[K_F_ZZ] * a13 - rot[K_F_YZ] * a12; + } + + // update stretch tensor in the new configuration - + + const ScalarCompact a1 = str_ten[K_S_XY] + vort[K_V_XY]; + const ScalarCompact a2 = str_ten[K_S_YZ] + vort[K_V_YZ]; + const ScalarCompact a3 = str_ten[K_S_ZX] + vort[K_V_ZX]; + const ScalarCompact b1 = str_ten[K_S_ZX] - vort[K_V_ZX]; + const ScalarCompact b2 = str_ten[K_S_XY] - vort[K_V_XY]; + const ScalarCompact b3 = str_ten[K_S_YZ] - vort[K_V_YZ]; + + const ScalarCompact s_XX = stretch[K_S_XX]; + const ScalarCompact s_YY = stretch[K_S_YY]; + const ScalarCompact s_ZZ = stretch[K_S_ZZ]; + const ScalarCompact s_XY = stretch[K_S_XY]; + const ScalarCompact s_YZ = stretch[K_S_YZ]; + const ScalarCompact s_ZX = stretch[K_S_ZX]; + + stretch[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX); + stretch[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY); + stretch[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ); + stretch[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX); + stretch[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY); + stretch[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ); + } + + //-------------------------------------------------------------------------- + + template< typename ScalarCompact > + static KOKKOS_INLINE_FUNCTION + void rotate_tensor( const ScalarCompact str_ten[] , + const ScalarCompact rot[] , + ScalarCompact rot_str[] ) + { + ScalarCompact t[9]; + + t[0] = str_ten[K_S_XX]*rot[K_F_XX] + str_ten[K_S_XY]*rot[K_F_YX] + str_ten[K_S_XZ]*rot[K_F_ZX]; + t[1] = str_ten[K_S_YX]*rot[K_F_XX] + str_ten[K_S_YY]*rot[K_F_YX] + str_ten[K_S_YZ]*rot[K_F_ZX]; + t[2] = str_ten[K_S_ZX]*rot[K_F_XX] + str_ten[K_S_ZY]*rot[K_F_YX] + str_ten[K_S_ZZ]*rot[K_F_ZX]; + + t[3] = str_ten[K_S_XX]*rot[K_F_XY] + str_ten[K_S_XY]*rot[K_F_YY] + str_ten[K_S_XZ]*rot[K_F_ZY]; + t[4] = str_ten[K_S_YX]*rot[K_F_XY] + str_ten[K_S_YY]*rot[K_F_YY] + str_ten[K_S_YZ]*rot[K_F_ZY]; + t[5] = str_ten[K_S_ZX]*rot[K_F_XY] + str_ten[K_S_ZY]*rot[K_F_YY] + str_ten[K_S_ZZ]*rot[K_F_ZY]; + + t[6] = str_ten[K_S_XX]*rot[K_F_XZ] + str_ten[K_S_XY]*rot[K_F_YZ] + str_ten[K_S_XZ]*rot[K_F_ZZ]; + t[7] = str_ten[K_S_YX]*rot[K_F_XZ] + str_ten[K_S_YY]*rot[K_F_YZ] + str_ten[K_S_YZ]*rot[K_F_ZZ]; + t[8] = str_ten[K_S_ZX]*rot[K_F_XZ] + str_ten[K_S_ZY]*rot[K_F_YZ] + str_ten[K_S_ZZ]*rot[K_F_ZZ]; + + + rot_str[ K_S_XX ] = rot[K_F_XX] * t[0] + rot[K_F_YX] * t[1] + rot[K_F_ZX] * t[2]; + rot_str[ K_S_YY ] = rot[K_F_XY] * t[3] + rot[K_F_YY] * t[4] + rot[K_F_ZY] * t[5]; + rot_str[ K_S_ZZ ] = rot[K_F_XZ] * t[6] + rot[K_F_YZ] * t[7] + rot[K_F_ZZ] * t[8]; + + rot_str[ K_S_XY ] = rot[K_F_XX] * t[3] + rot[K_F_YX] * t[4] + rot[K_F_ZX] * t[5]; + rot_str[ K_S_YZ ] = rot[K_F_XY] * t[6] + rot[K_F_YY] * t[7] + rot[K_F_ZY] * t[8]; + rot_str[ K_S_ZX ] = rot[K_F_XZ] * t[0] + rot[K_F_YZ] * t[1] + rot[K_F_ZZ] * t[2]; + } + + //-------------------------------------------------------------------------- + + template< class ScalarPrecise , + class ScalarCompact > + static KOKKOS_INLINE_FUNCTION + void rotate_tensor_backward( const ScalarPrecise stress[] , + const ScalarCompact rot[] , + ScalarCompact rot_stress[] ) + { + ScalarCompact t[9] ; + + t[0] = stress[K_S_XX]*rot[K_F_XX]+ stress[K_S_XY]*rot[K_F_XY]+ stress[K_S_XZ]*rot[K_F_XZ]; + t[1] = stress[K_S_YX]*rot[K_F_XX]+ stress[K_S_YY]*rot[K_F_XY]+ stress[K_S_YZ]*rot[K_F_XZ]; + t[2] = stress[K_S_ZX]*rot[K_F_XX]+ stress[K_S_ZY]*rot[K_F_XY]+ stress[K_S_ZZ]*rot[K_F_XZ]; + t[3] = stress[K_S_XX]*rot[K_F_YX]+ stress[K_S_XY]*rot[K_F_YY]+ stress[K_S_XZ]*rot[K_F_YZ]; + t[4] = stress[K_S_YX]*rot[K_F_YX]+ stress[K_S_YY]*rot[K_F_YY]+ stress[K_S_YZ]*rot[K_F_YZ]; + t[5] = stress[K_S_ZX]*rot[K_F_YX]+ stress[K_S_ZY]*rot[K_F_YY]+ stress[K_S_ZZ]*rot[K_F_YZ]; + t[6] = stress[K_S_XX]*rot[K_F_ZX]+ stress[K_S_XY]*rot[K_F_ZY]+ stress[K_S_XZ]*rot[K_F_ZZ]; + t[7] = stress[K_S_YX]*rot[K_F_ZX]+ stress[K_S_YY]*rot[K_F_ZY]+ stress[K_S_YZ]*rot[K_F_ZZ]; + t[8] = stress[K_S_ZX]*rot[K_F_ZX]+ stress[K_S_ZY]*rot[K_F_ZY]+ stress[K_S_ZZ]*rot[K_F_ZZ]; + + rot_stress[ K_S_XX ] = rot[K_F_XX]*t[0] + rot[K_F_XY]*t[1] + rot[K_F_XZ]*t[2]; + rot_stress[ K_S_YY ] = rot[K_F_YX]*t[3] + rot[K_F_YY]*t[4] + rot[K_F_YZ]*t[5]; + rot_stress[ K_S_ZZ ] = rot[K_F_ZX]*t[6] + rot[K_F_ZY]*t[7] + rot[K_F_ZZ]*t[8]; + + rot_stress[ K_S_XY ] = rot[K_F_XX]*t[3] + rot[K_F_XY]*t[4] + rot[K_F_XZ]*t[5]; + rot_stress[ K_S_YZ ] = rot[K_F_YX]*t[6] + rot[K_F_YY]*t[7] + rot[K_F_YZ]*t[8]; + rot_stress[ K_S_ZX ] = rot[K_F_ZX]*t[0] + rot[K_F_ZY]*t[1] + rot[K_F_ZZ]*t[2]; + } + + //-------------------------------------------------------------------------- + + template< class ScalarPrecise , + class ScalarCompact > + KOKKOS_INLINE_FUNCTION static + void update_stress( const float dt , + const float two_mu , + const float bulk_modulus , + const ScalarCompact rot_str[] , + ScalarPrecise stress[] ) + { + const ScalarCompact e = rot_str[ K_S_XX ] + rot_str[ K_S_YY ] + rot_str[ K_S_ZZ ] ; + const ScalarCompact eb = e * bulk_modulus ; + const ScalarCompact e3 = e / 3.0 ; + + stress[K_S_XX] += dt * ( two_mu * ( rot_str[K_S_XX] - e3 ) + eb ); + stress[K_S_YY] += dt * ( two_mu * ( rot_str[K_S_YY] - e3 ) + eb ); + stress[K_S_ZZ] += dt * ( two_mu * ( rot_str[K_S_ZZ] - e3 ) + eb ); + + stress[K_S_XY] += dt * two_mu * rot_str[K_S_XY]; + stress[K_S_YZ] += dt * two_mu * rot_str[K_S_YZ]; + stress[K_S_ZX] += dt * two_mu * rot_str[K_S_ZX]; + } + + //-------------------------------------------------------------------------- + + template< class ScalarPrecise , + class ScalarCompact > + static KOKKOS_INLINE_FUNCTION + void comp_force( const ScalarPrecise vx[] , + const ScalarPrecise vy[] , + const ScalarPrecise vz[] , + const ScalarCompact grad_x[] , + const ScalarCompact grad_y[] , + const ScalarCompact grad_z[] , + const ScalarCompact total_stress12th[] , + ScalarCompact force[][ SpatialDim ] , + ScalarCompact & energy ) + { + ScalarPrecise internal_energy = 0 ; + + for ( unsigned inode = 0; inode < ElemNodeCount ; ++inode ) { + + force[inode][0] = total_stress12th[K_S_XX] * grad_x[inode] + + total_stress12th[K_S_XY] * grad_y[inode] + + total_stress12th[K_S_XZ] * grad_z[inode] ; + + force[inode][1] = total_stress12th[K_S_YX] * grad_x[inode] + + total_stress12th[K_S_YY] * grad_y[inode] + + total_stress12th[K_S_YZ] * grad_z[inode] ; + + force[inode][2] = total_stress12th[K_S_ZX] * grad_x[inode] + + total_stress12th[K_S_ZY] * grad_y[inode] + + total_stress12th[K_S_ZZ] * grad_z[inode] ; + + internal_energy += force[inode][0] * vx[inode] + + force[inode][1] * vy[inode] + + force[inode][2] * vz[inode] ; + } + + energy = internal_energy ; + } + + //-------------------------------------------------------------------------- +}; + +} // namespace Explicit + +#endif /* #ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP */ + diff --git a/lib/kokkos/example/multi_fem/Implicit.hpp b/lib/kokkos/example/multi_fem/Implicit.hpp new file mode 100644 index 0000000000000000000000000000000000000000..53f602f11ae3fe6e0a61bf29ded6ad8464f653b0 --- /dev/null +++ b/lib/kokkos/example/multi_fem/Implicit.hpp @@ -0,0 +1,341 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef HYBRIDFEM_IMPLICIT_HPP +#define HYBRIDFEM_IMPLICIT_HPP + +#include <utility> +#include <iostream> +#include <iomanip> + +#include <Kokkos_Core.hpp> +#include <SparseLinearSystem.hpp> +#include <SparseLinearSystemFill.hpp> +#include <ImplicitFunctors.hpp> +#include <FEMesh.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace HybridFEM { +namespace Implicit { + +struct PerformanceData { + double mesh_time ; + double graph_time ; + double elem_time ; + double matrix_gather_fill_time ; + double matrix_boundary_condition_time ; + double cg_iteration_time ; + + PerformanceData() + : mesh_time(0) + , graph_time(0) + , elem_time(0) + , matrix_gather_fill_time(0) + , matrix_boundary_condition_time(0) + , cg_iteration_time(0) + {} + + void best( const PerformanceData & rhs ) + { + mesh_time = std::min( mesh_time , rhs.mesh_time ); + graph_time = std::min( graph_time , rhs.graph_time ); + elem_time = std::min( elem_time , rhs.elem_time ); + matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time ); + matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time ); + cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time ); + } +}; + +//---------------------------------------------------------------------------- + +template< typename Scalar , class FixtureType > +PerformanceData run( const typename FixtureType::FEMeshType & mesh , + const int , // global_max_x , + const int , // global_max_y , + const int global_max_z , + const bool print_sample ) +{ + typedef Scalar scalar_type ; + typedef FixtureType fixture_type ; + typedef typename fixture_type::execution_space execution_space; + //typedef typename execution_space::size_type size_type ; // unused + + typedef typename fixture_type::FEMeshType mesh_type ; + typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ; + + enum { ElementNodeCount = fixture_type::element_node_count }; + + const comm::Machine machine = mesh.parallel_data_map.machine ; + + const size_t element_count = mesh.elem_node_ids.dimension_0(); + + const size_t iteration_limit = 200 ; + const double residual_tolerance = 1e-14 ; + + size_t iteration_count = 0 ; + double residual_norm = 0 ; + + PerformanceData perf_data ; + + //------------------------------------ + // Sparse linear system types: + + typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ; + typedef typename matrix_type::graph_type matrix_graph_type ; + typedef typename matrix_type::coefficients_type matrix_coefficients_type ; + + typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ; + + //------------------------------------ + // Problem setup types: + + typedef ElementComputation< scalar_type , scalar_type , execution_space > ElementFunctor ; + typedef DirichletBoundary< scalar_type , scalar_type , execution_space > BoundaryFunctor ; + + typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ; + typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ; + + typedef GatherFill< matrix_type , + mesh_type , + elem_matrices_type , + elem_vectors_type > GatherFillFunctor ; + + //------------------------------------ + + const scalar_type elem_coeff_K = 2 ; + const scalar_type elem_load_Q = 1 ; + + matrix_type linsys_matrix ; + vector_type linsys_rhs ; + vector_type linsys_solution ; + + typename graph_factory::element_map_type element_map ; + + Kokkos::Timer wall_clock ; + + //------------------------------------ + // Generate sparse matrix graph and element->graph map. + + graph_factory::create( mesh , linsys_matrix.graph , element_map ); + + execution_space::fence(); + perf_data.graph_time = comm::max( machine , wall_clock.seconds() ); + + //------------------------------------ + // Allocate linear system coefficients and rhs: + + const size_t local_owned_length = + linsys_matrix.graph.row_map.dimension_0() - 1 ; + + linsys_matrix.coefficients = + matrix_coefficients_type( "coeff" , linsys_matrix.graph.entries.dimension_0() ); + + linsys_rhs = vector_type( "rhs" , local_owned_length ); + linsys_solution = vector_type( "solution" , local_owned_length ); + + //------------------------------------ + // Fill linear system + { + elem_matrices_type elem_matrices ; + elem_vectors_type elem_vectors ; + + if ( element_count ) { + elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count ); + elem_vectors = elem_vectors_type ( std::string("elem_vectors"), element_count ); + } + + //------------------------------------ + // Compute element matrices and vectors: + + wall_clock.reset(); + + ElementFunctor::apply( mesh , + elem_matrices , elem_vectors , + elem_coeff_K , elem_load_Q ); + + execution_space::fence(); + perf_data.elem_time = comm::max( machine , wall_clock.seconds() ); + + //------------------------------------ + // Fill linear system coefficients: + + wall_clock.reset(); + + GatherFillFunctor::apply( linsys_matrix , linsys_rhs , + mesh , element_map , elem_matrices , elem_vectors ); + + execution_space::fence(); + perf_data.matrix_gather_fill_time = comm::max( machine , wall_clock.seconds() ); + + // Apply boundary conditions: + + wall_clock.reset(); + + BoundaryFunctor::apply( linsys_matrix , linsys_rhs , mesh , + 0 , global_max_z , 0 , global_max_z ); + + execution_space::fence(); + perf_data.matrix_boundary_condition_time = comm::max( machine , wall_clock.seconds() ); + } + + //------------------------------------ + // Solve linear sytem + + cgsolve( mesh.parallel_data_map , + linsys_matrix , linsys_rhs , linsys_solution , + iteration_count , residual_norm , + perf_data.cg_iteration_time , + iteration_limit , residual_tolerance ); + + //------------------------------------ + + if ( print_sample ) { + + typename mesh_type::node_coords_type::HostMirror coords_h = + Kokkos::create_mirror( mesh.node_coords ); + + typename vector_type::HostMirror X_h = + Kokkos::create_mirror( linsys_solution ); + + Kokkos::deep_copy( coords_h , mesh.node_coords ); + Kokkos::deep_copy( X_h , linsys_solution ); + + for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) { + const coordinate_scalar_type x = coords_h(i,0); + const coordinate_scalar_type y = coords_h(i,1); + const coordinate_scalar_type z = coords_h(i,2); + + if ( x <= 0 && y <= 0 ) { + std::cout << " node( " << x << " " << y << " " << z << " ) = " + << X_h(i) << std::endl ; + } + } + } + + return perf_data ; +} + +//---------------------------------------------------------------------------- + +template< typename Scalar , class Device > +void driver( const char * const label , + comm::Machine machine , + const int gang_count , + const int elem_count_beg , + const int elem_count_end , + const int runs ) +{ + typedef Scalar scalar_type ; + typedef Device execution_space ; + typedef double coordinate_scalar_type ; + typedef FixtureElementHex8 fixture_element_type ; + + typedef BoxMeshFixture< coordinate_scalar_type , + execution_space , + fixture_element_type > fixture_type ; + + typedef typename fixture_type::FEMeshType mesh_type ; + + const size_t proc_count = comm::size( machine ); + const size_t proc_rank = comm::rank( machine ); + + if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ; + + if ( comm::rank( machine ) == 0 ) { + std::cout << std::endl ; + std::cout << "\"Kokkos::HybridFE::Implicit " << label << "\"" << std::endl; + std::cout << "\"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\"" << std::endl + << "\"elems\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\"" << std::endl ; + } + + for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 ) + { + const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) ); + const int iy = ix + 1 ; + const int iz = 2 * iy ; + const int n = ix * iy * iz ; + + mesh_type mesh = + fixture_type::create( proc_count , proc_rank , gang_count , + ix , iy , iz ); + + mesh.parallel_data_map.machine = machine ; + + PerformanceData perf_data , perf_best ; + + for(int j = 0; j < runs; j++){ + + perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, false ); + + if( j == 0 ) { + perf_best = perf_data ; + } + else { + perf_best.best( perf_data ); + } + } + + if ( comm::rank( machine ) == 0 ) { + + std::cout << std::setw(8) << n << " , " + << std::setw(10) << perf_best.graph_time * 1000 << " , " + << std::setw(10) << perf_best.elem_time * 1000 << " , " + << std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , " + << std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , " + << std::setw(10) << perf_best.cg_iteration_time * 1000 + << std::endl ; + } + } +} + +//---------------------------------------------------------------------------- + +} /* namespace Implicit */ +} /* namespace HybridFEM */ + + +#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */ + diff --git a/lib/kokkos/example/multi_fem/ImplicitFunctors.hpp b/lib/kokkos/example/multi_fem/ImplicitFunctors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9d9aa771636c7ec9af064b9346ffed7f01344a2f --- /dev/null +++ b/lib/kokkos/example/multi_fem/ImplicitFunctors.hpp @@ -0,0 +1,585 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <fstream> +#include <iomanip> +#include <cstdlib> +#include <cmath> + +namespace HybridFEM { +namespace Implicit { + +//---------------------------------------------------------------------------- + +template< typename Scalar , unsigned Dim , unsigned N > +struct TensorIntegration ; + +template<typename Scalar > +struct TensorIntegration<Scalar,1,1> { + Scalar pts[1] ; + Scalar wts[1] ; + + TensorIntegration() { pts[0] = 0 ; wts[0] = 2 ; } +}; + +template<typename Scalar > +struct TensorIntegration<Scalar,1,2> +{ + Scalar pts[2] ; + Scalar wts[2] ; + + TensorIntegration() + { + const Scalar x2 = 0.577350269 ; + pts[0] = -x2; wts[0] = 1.0; + pts[1] = x2; wts[1] = 1.0; + } +}; + +template<typename Scalar > +struct TensorIntegration<Scalar,1,3> +{ + Scalar pts[3] ; + Scalar wts[3] ; + + TensorIntegration() + { + const Scalar x3 = 0.774596669 ; + const Scalar w1 = 0.555555556 ; + const Scalar w2 = 0.888888889 ; + pts[0] = -x3 ; wts[0] = w1 ; + pts[1] = 0 ; wts[1] = w2 ; + pts[2] = x3 ; wts[2] = w1 ; + } +}; + +template< typename Scalar , unsigned Order > +struct TensorIntegration<Scalar,3,Order> +{ + static const unsigned N = Order * Order * Order ; + + Scalar pts[N][3] ; + Scalar wts[N]; + + TensorIntegration() + { + TensorIntegration<Scalar,1,Order> oneD ; + + unsigned n = 0 ; + for ( unsigned k = 0 ; k < Order ; ++k ) { + for ( unsigned j = 0 ; j < Order ; ++j ) { + for ( unsigned i = 0 ; i < Order ; ++i , ++n ) { + pts[n][0] = oneD.pts[i] ; + pts[n][1] = oneD.pts[j] ; + pts[n][2] = oneD.pts[k] ; + wts[n] = oneD.wts[i] * oneD.wts[j] * oneD.wts[k] ; + }}} + } +}; + +//---------------------------------------------------------------------------- + +template< typename Scalar > +struct ShapeFunctionEvaluation { + + static const unsigned FunctionCount = 8 ; + static const unsigned SpatialDimension = 3 ; + static const unsigned IntegrationOrder = 2 ; + + typedef TensorIntegration< Scalar , SpatialDimension , IntegrationOrder > + TensorIntegrationType ; + + static const unsigned PointCount = TensorIntegrationType::N ; + + Scalar value [ PointCount ][ FunctionCount ] ; + Scalar gradient[ PointCount ][ FunctionCount * SpatialDimension ]; + Scalar weight [ PointCount ]; + + ShapeFunctionEvaluation() + { + const TensorIntegration< Scalar , SpatialDimension , IntegrationOrder > + integration ; + + const Scalar ONE8TH = 0.125 ; + + for ( unsigned i = 0 ; i < PointCount ; ++i ) { + + const Scalar u = 1.0 - integration.pts[i][0]; + const Scalar v = 1.0 - integration.pts[i][1]; + const Scalar w = 1.0 - integration.pts[i][2]; + + const Scalar up1 = 1.0 + integration.pts[i][0]; + const Scalar vp1 = 1.0 + integration.pts[i][1]; + const Scalar wp1 = 1.0 + integration.pts[i][2]; + + weight[i] = integration.wts[i] ; + + // Vaues: + value[i][0] = ONE8TH * u * v * w ; + value[i][1] = ONE8TH * up1 * v * w ; + value[i][2] = ONE8TH * up1 * vp1 * w ; + value[i][3] = ONE8TH * u * vp1 * w ; + + value[i][4] = ONE8TH * u * v * wp1 ; + value[i][5] = ONE8TH * up1 * v * wp1 ; + value[i][6] = ONE8TH * up1 * vp1 * wp1 ; + value[i][7] = ONE8TH * u * vp1 * wp1 ; + + //fn 0 = u * v * w + gradient[i][ 0] = ONE8TH * -1 * v * w ; + gradient[i][ 1] = ONE8TH * u * -1 * w ; + gradient[i][ 2] = ONE8TH * u * v * -1 ; + + //fn 1 = up1 * v * w + gradient[i][ 3] = ONE8TH * 1 * v * w ; + gradient[i][ 4] = ONE8TH * up1 * -1 * w ; + gradient[i][ 5] = ONE8TH * up1 * v * -1 ; + + //fn 2 = up1 * vp1 * w + gradient[i][ 6] = ONE8TH * 1 * vp1 * w ; + gradient[i][ 7] = ONE8TH * up1 * 1 * w ; + gradient[i][ 8] = ONE8TH * up1 * vp1 * -1 ; + + //fn 3 = u * vp1 * w + gradient[i][ 9] = ONE8TH * -1 * vp1 * w ; + gradient[i][10] = ONE8TH * u * 1 * w ; + gradient[i][11] = ONE8TH * u * vp1 * -1 ; + + //fn 4 = u * v * wp1 + gradient[i][12] = ONE8TH * -1 * v * wp1 ; + gradient[i][13] = ONE8TH * u * -1 * wp1 ; + gradient[i][14] = ONE8TH * u * v * 1 ; + + //fn 5 = up1 * v * wp1 + gradient[i][15] = ONE8TH * 1 * v * wp1 ; + gradient[i][16] = ONE8TH * up1 * -1 * wp1 ; + gradient[i][17] = ONE8TH * up1 * v * 1 ; + + //fn 6 = up1 * vp1 * wp1 + gradient[i][18] = ONE8TH * 1 * vp1 * wp1 ; + gradient[i][19] = ONE8TH * up1 * 1 * wp1 ; + gradient[i][20] = ONE8TH * up1 * vp1 * 1 ; + + //fn 7 = u * vp1 * wp1 + gradient[i][21] = ONE8TH * -1 * vp1 * wp1 ; + gradient[i][22] = ONE8TH * u * 1 * wp1 ; + gradient[i][23] = ONE8TH * u * vp1 * 1 ; + } + } +}; + +//---------------------------------------------------------------------------- + +template< typename ScalarType , typename ScalarCoordType , class DeviceType > +struct ElementComputation +{ + typedef DeviceType execution_space; + typedef ScalarType scalar_type ; + typedef typename execution_space::size_type size_type ; + + static const size_type ElementNodeCount = 8 ; + + typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ; + typedef Kokkos::View< scalar_type[][ElementNodeCount][ElementNodeCount] , execution_space > elem_matrices_type ; + typedef Kokkos::View< scalar_type[][ElementNodeCount] , execution_space > elem_vectors_type ; + + typedef ShapeFunctionEvaluation< scalar_type > shape_function_data ; + + static const unsigned SpatialDim = shape_function_data::SpatialDimension ; + static const unsigned FunctionCount = shape_function_data::FunctionCount ; + +private: + + const shape_function_data shape_eval ; + typename mesh_type::elem_node_ids_type elem_node_ids ; + typename mesh_type::node_coords_type node_coords ; + elem_matrices_type element_matrices ; + elem_vectors_type element_vectors ; + scalar_type coeff_K ; + scalar_type coeff_Q ; + + ElementComputation( const mesh_type & arg_mesh , + const elem_matrices_type & arg_element_matrices , + const elem_vectors_type & arg_element_vectors , + const scalar_type arg_coeff_K , + const scalar_type arg_coeff_Q ) + : shape_eval() + , elem_node_ids( arg_mesh.elem_node_ids ) + , node_coords( arg_mesh.node_coords ) + , element_matrices( arg_element_matrices ) + , element_vectors( arg_element_vectors ) + , coeff_K( arg_coeff_K ) + , coeff_Q( arg_coeff_Q ) + {} + +public: + + static void apply( const mesh_type & mesh , + const elem_matrices_type & elem_matrices , + const elem_vectors_type & elem_vectors , + const scalar_type elem_coeff_K , + const scalar_type elem_coeff_Q ) + { + ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q ); + const size_t elem_count = mesh.elem_node_ids.dimension_0(); + + parallel_for( elem_count , comp ); + } + + //------------------------------------ + + static const unsigned FLOPS_jacobian = + FunctionCount * SpatialDim * SpatialDim * 2 ; + + KOKKOS_INLINE_FUNCTION + void jacobian( const ScalarCoordType * x, + const ScalarCoordType * y, + const ScalarCoordType * z, + const scalar_type * grad_vals, + scalar_type * J) const + { + int i_grad = 0 ; + + for( unsigned i = 0; i < ElementNodeCount ; ++i , i_grad += SpatialDim ) { + const scalar_type g0 = grad_vals[ i_grad ]; + const scalar_type g1 = grad_vals[ i_grad + 1 ]; + const scalar_type g2 = grad_vals[ i_grad + 2 ]; + const scalar_type x0 = x[i] ; + const scalar_type x1 = y[i] ; + const scalar_type x2 = z[i] ; + + J[0] += g0 * x0 ; + J[1] += g0 * x1 ; + J[2] += g0 * x2 ; + + J[3] += g1 * x0 ; + J[4] += g1 * x1 ; + J[5] += g1 * x2 ; + + J[6] += g2 * x0 ; + J[7] += g2 * x1 ; + J[8] += g2 * x2 ; + } + } + + //------------------------------------ + + static const unsigned FLOPS_inverse_and_det = 46 ; + + KOKKOS_INLINE_FUNCTION + scalar_type inverse_and_determinant3x3( scalar_type * const J ) const + { + const scalar_type J00 = J[0]; + const scalar_type J01 = J[1]; + const scalar_type J02 = J[2]; + + const scalar_type J10 = J[3]; + const scalar_type J11 = J[4]; + const scalar_type J12 = J[5]; + + const scalar_type J20 = J[6]; + const scalar_type J21 = J[7]; + const scalar_type J22 = J[8]; + + const scalar_type term0 = J22*J11 - J21*J12; + const scalar_type term1 = J22*J01 - J21*J02; + const scalar_type term2 = J12*J01 - J11*J02; + + const scalar_type detJ = J00*term0 - J10*term1 + J20*term2; + const scalar_type inv_detJ = 1.0/detJ; + + J[0] = term0*inv_detJ; + J[1] = -term1*inv_detJ; + J[2] = term2*inv_detJ; + + J[3] = -(J22*J10 - J20*J12)*inv_detJ; + J[4] = (J22*J00 - J20*J02)*inv_detJ; + J[5] = -(J12*J00 - J10*J02)*inv_detJ; + + J[6] = (J21*J10 - J20*J11)*inv_detJ; + J[7] = -(J21*J00 - J20*J01)*inv_detJ; + J[8] = (J11*J00 - J10*J01)*inv_detJ; + + return detJ ; + } + + //------------------------------------ + + KOKKOS_INLINE_FUNCTION + void matTransMat3x3_X_3xn( const scalar_type * A, int n, + const scalar_type * B, + scalar_type * C ) const + { + //A is 3x3, B is 3xn. So C is also 3xn. + //A,B,C are all assumed to be ordered such that columns are contiguous. + + scalar_type * Cj = C; + const scalar_type * Bj = B; + + for(int j=0; j<n; ++j) { + Cj[0] = A[0]*Bj[0] + A[1]*Bj[1] + A[2]*Bj[2]; + Cj[1] = A[3]*Bj[0] + A[4]*Bj[1] + A[5]*Bj[2]; + Cj[2] = A[6]*Bj[0] + A[7]*Bj[1] + A[8]*Bj[2]; + Bj += 3; + Cj += 3; + } + + } + //------------------------------------ + + static const unsigned FLOPS_contributeDiffusionMatrix = FunctionCount * ( 3 * 5 + FunctionCount * 7 ) ; + + KOKKOS_INLINE_FUNCTION + void contributeDiffusionMatrix( + const scalar_type weight , + const scalar_type grad_vals[] , + const scalar_type invJ[] , + scalar_type elem_mat[][8] ) const + { + scalar_type dpsidx[8], dpsidy[8], dpsidz[8]; + + int i_grad = 0 ; + for( unsigned i = 0; i < FunctionCount ; ++i , i_grad += 3 ) { + const scalar_type g0 = grad_vals[i_grad+0]; + const scalar_type g1 = grad_vals[i_grad+1]; + const scalar_type g2 = grad_vals[i_grad+2]; + + dpsidx[i] = g0 * invJ[0] + g1 * invJ[1] + g2 * invJ[2]; + dpsidy[i] = g0 * invJ[3] + g1 * invJ[4] + g2 * invJ[5]; + dpsidz[i] = g0 * invJ[6] + g1 * invJ[7] + g2 * invJ[8]; + } + + for( unsigned m = 0; m < FunctionCount; m++) { + for( unsigned n = 0; n < FunctionCount; n++) { + + elem_mat[m][n] += weight * + ((dpsidx[m] * dpsidx[n]) + + (dpsidy[m] * dpsidy[n]) + + (dpsidz[m] * dpsidz[n])); + } + } + } + + //------------------------------------ + + static const unsigned FLOPS_contributeSourceVector = FunctionCount * 2 ; + + KOKKOS_INLINE_FUNCTION + void contributeSourceVector( const scalar_type term , + const scalar_type psi[] , + scalar_type elem_vec[] ) const + { + for( unsigned i=0; i< FunctionCount ; ++i) { + elem_vec[i] += psi[i] * term ; + } + } + + + static const unsigned FLOPS_operator = + shape_function_data::PointCount * ( 3 + + FLOPS_jacobian + + FLOPS_inverse_and_det + + FLOPS_contributeDiffusionMatrix + + FLOPS_contributeSourceVector ) ; + + KOKKOS_INLINE_FUNCTION + void operator()( int ielem )const { + + scalar_type elem_vec[8] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; + scalar_type elem_mat[8][8] = + { { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } , + { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } }; + + ScalarCoordType x[8], y[8], z[8]; + + for ( int i = 0 ; i < 8 ; ++i ) { + const int node_index = elem_node_ids( ielem , i ); + x[i] = node_coords( node_index , 0 ); + y[i] = node_coords( node_index , 1 ); + z[i] = node_coords( node_index , 2 ); + } + + // This loop could be parallelized; however, + // it would require additional per-thread temporaries + // of 'elem_vec' and 'elem_mat' which would + // consume more local memory and have to be reduced. + + for ( unsigned i = 0 ; i < shape_function_data::PointCount ; ++i ) { + + scalar_type J[SpatialDim*SpatialDim] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + jacobian( x, y, z, shape_eval.gradient[i] , J ); + + // Overwrite J with its inverse to save scratch memory space. + const scalar_type detJ_w = shape_eval.weight[i] * inverse_and_determinant3x3(J); + const scalar_type k_detJ_w = coeff_K * detJ_w ; + const scalar_type Q_detJ_w = coeff_Q * detJ_w ; + + contributeDiffusionMatrix( k_detJ_w , shape_eval.gradient[i] , J , elem_mat ); + + contributeSourceVector( Q_detJ_w , shape_eval.value[i] , elem_vec ); + } + + for( size_type i=0; i< ElementNodeCount ; ++i) { + element_vectors(ielem, i) = elem_vec[i] ; + } + + for( size_type i = 0; i < ElementNodeCount ; i++){ + for( size_type j = 0; j < ElementNodeCount ; j++){ + element_matrices(ielem, i, j) = elem_mat[i][j] ; + } + } + } +}; /* ElementComputation */ + +//---------------------------------------------------------------------------- + +template< typename ScalarType , typename ScalarCoordType , class DeviceType > +struct DirichletBoundary +{ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type ; + + static const size_type ElementNodeCount = 8 ; + + typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ; + typedef Kokkos::View< ScalarType[] , execution_space > vector_type ; + + typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ; + + typename mesh_type::node_coords_type node_coords ; + matrix_type matrix ; + vector_type rhs ; + ScalarCoordType bc_lower_z ; + ScalarCoordType bc_upper_z ; + ScalarType bc_lower_value ; + ScalarType bc_upper_value ; + + KOKKOS_INLINE_FUNCTION + void operator()( size_type inode ) const + { + // Apply a dirichlet boundary condition to 'irow' + // to maintain the symmetry of the original + // global stiffness matrix, zero out the columns + // that correspond to boundary conditions, and + // adjust the load vector accordingly + + const size_type iBeg = matrix.graph.row_map[inode]; + const size_type iEnd = matrix.graph.row_map[inode+1]; + + const ScalarCoordType z = node_coords(inode,2); + const bool bc_lower = z <= bc_lower_z ; + const bool bc_upper = bc_upper_z <= z ; + + if ( bc_lower || bc_upper ) { + const ScalarType bc_value = bc_lower ? bc_lower_value + : bc_upper_value ; + + rhs(inode) = bc_value ; // set the rhs vector + + // zero each value on the row, and leave a one + // on the diagonal + + for( size_type i = iBeg ; i < iEnd ; i++) { + matrix.coefficients(i) = + (int) inode == matrix.graph.entries(i) ? 1 : 0 ; + } + } + else { + // Find any columns that are boundary conditions. + // Clear them and adjust the load vector + + for( size_type i = iBeg ; i < iEnd ; i++ ) { + const size_type cnode = matrix.graph.entries(i) ; + + const ScalarCoordType zc = node_coords(cnode,2); + const bool c_bc_lower = zc <= bc_lower_z ; + const bool c_bc_upper = bc_upper_z <= zc ; + + if ( c_bc_lower || c_bc_upper ) { + + const ScalarType c_bc_value = c_bc_lower ? bc_lower_value + : bc_upper_value ; + + rhs( inode ) -= c_bc_value * matrix.coefficients(i); + + matrix.coefficients(i) = 0 ; + } + } + } + } + + + static void apply( const matrix_type & linsys_matrix , + const vector_type & linsys_rhs , + const mesh_type & mesh , + const ScalarCoordType bc_lower_z , + const ScalarCoordType bc_upper_z , + const ScalarType bc_lower_value , + const ScalarType bc_upper_value ) + { + const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ; + DirichletBoundary op ; + op.node_coords = mesh.node_coords ; + op.matrix = linsys_matrix ; + op.rhs = linsys_rhs ; + op.bc_lower_z = bc_lower_z ; + op.bc_upper_z = bc_upper_z ; + op.bc_lower_value = bc_lower_value ; + op.bc_upper_value = bc_upper_value ; + parallel_for( row_count , op ); + } +}; + +//---------------------------------------------------------------------------- + +} /* namespace Implicit */ +} /* namespace HybridFEM */ + diff --git a/lib/kokkos/example/multi_fem/LinAlgBLAS.hpp b/lib/kokkos/example/multi_fem/LinAlgBLAS.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2478fa9aede034ec286e34911847e1eaf4eb11e3 --- /dev/null +++ b/lib/kokkos/example/multi_fem/LinAlgBLAS.hpp @@ -0,0 +1,567 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef USESCASES_LINALG_BLAS_HPP +#define USESCASES_LINALG_BLAS_HPP + +#include <cmath> +#include <utility> +#include <ParallelComm.hpp> +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class Scalar , class Layout , class DeviceType > struct Dot ; + +template< class Scalar , class Layout , class DeviceType > struct Dot1 ; + +template< typename ScalarA , + typename ScalarY , + class Layout , class Device > +struct Scale ; + +template< typename ScalarA , + typename ScalarY , + class Layout , class Device > +struct Fill ; + +template< typename ScalarA , + typename ScalarX , + typename ScalarY , + class Layout , class Device > +struct AXPY ; + +template< typename ScalarX , + typename ScalarB , + typename ScalarY , + class Layout , class Device > +struct XPBY ; + +template< typename ScalarA , + typename ScalarX , + typename ScalarB , + typename ScalarY , + typename ScalarW , + class Layout , class Device > +struct WAXPBY ; + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_MPI ) + +template< typename ScalarX /* Allow mix of const and non-const */ , + typename ScalarY /* Allow mix of const and non-const */ , + class L , class D , + class MX /* Allow any management type */ , + class MY /* Allow any management type */ > +inline +double dot( const size_t n , + const View< ScalarX * , L , D , MX > & x , + const View< ScalarY * , L , D , MY > & y , + comm::Machine machine ) +{ + double global_result = 0 ; + double local_result = 0 ; + + Impl::Dot< ScalarX , L , D >( n , x , y , local_result ); + + MPI_Allreduce( & local_result , & global_result , 1 , + MPI_DOUBLE , MPI_SUM , machine.mpi_comm ); + + return global_result ; +} + +#else + +template< typename ScalarX /* Allow mix of const and non-const */ , + typename ScalarY /* Allow mix of const and non-const */ , + class L , class D , + class MX /* Allow any management type */ , + class MY /* Allow any management type */ > +inline +double dot( const size_t n , + const View< ScalarX * , L , D , MX > & x , + const View< ScalarY * , L , D , MY > & y , + comm::Machine ) +{ + double global_result = 0 ; + + Impl::Dot< ScalarX , L , D >( n , x , y , global_result ); + + return global_result ; +} + +#endif + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_MPI ) + +template< typename ScalarX /* Allow mix of const and non-const */ , + class L , class D , + class MX /* Allow any management type */ > +inline +double dot( const size_t n , + const View< ScalarX * , L , D , MX > & x , + comm::Machine machine ) +{ + double global_result = 0 ; + double local_result = 0 ; + + Impl::Dot1< ScalarX , L , D >( n , x , local_result ); + + MPI_Allreduce( & local_result , & global_result , 1 , + MPI_DOUBLE , MPI_SUM , machine.mpi_comm ); + + return global_result ; +} + +#else + +template< typename ScalarX /* Allow mix of const and non-const */ , + class L , class D , + class MX /* Allow any management type */ > +inline +double dot( const size_t n , + const View< ScalarX * , L , D , MX > & x , + comm::Machine ) +{ + double global_result = 0 ; + + Impl::Dot1< ScalarX , L , D >( n , x , global_result ); + + return global_result ; +} + +#endif + +//---------------------------------------------------------------------------- + +template< typename ScalarX /* Allow mix of const and non-const */ , + class L , class D , + class MX /* Allow any management type */ > +inline +double norm2( const size_t n , + const View< ScalarX * , L , D , MX > & x , + comm::Machine machine ) +{ + return std::sqrt( dot( n , x , machine ) ); +} + +//---------------------------------------------------------------------------- + +template< typename ScalarA , + typename ScalarX , + class L , + class D , + class MX > +void scale( const size_t n , + const ScalarA & alpha , + const View< ScalarX * , L , D , MX > & x ) +{ + Impl::Scale< ScalarA , ScalarX , L , D >( n , alpha , x ); +} + +template< typename ScalarA , + typename ScalarX , + class L , + class D , + class MX > +void fill( const size_t n , + const ScalarA & alpha , + const View< ScalarX * , L , D , MX > & x ) +{ + Impl::Fill< ScalarA , ScalarX , L , D >( n , alpha , x ); +} + +//---------------------------------------------------------------------------- + +template< typename ScalarA , + typename ScalarX , + typename ScalarY , + class L , + class D , + class MX , + class MY > +void axpy( const size_t n , + const ScalarA & alpha , + const View< ScalarX *, L , D , MX > & x , + const View< ScalarY *, L , D , MY > & y ) +{ + Impl::AXPY< ScalarA, ScalarX, ScalarY , L , D >( n, alpha, x, y ); +} + +//---------------------------------------------------------------------------- + +template< typename ScalarX , + typename ScalarB , + typename ScalarY , + class L , + class D , + class MX , + class MY > +void xpby( const size_t n , + const View< ScalarX *, L , D , MX > & x , + const ScalarB & beta , + const View< ScalarY *, L , D , MY > & y ) +{ + Impl::XPBY< ScalarX, ScalarB, ScalarY , L , D >( n, x, beta, y ); +} + +//---------------------------------------------------------------------------- +// w = alpha * x + beta * y + +template< typename ScalarA , + typename ScalarX , + typename ScalarB , + typename ScalarY , + typename ScalarW , + class L , class D , + class MX , class MY , class MW > +void waxpby( const size_t n , + const ScalarA & alpha , + const View< ScalarX * , L , D , MX > & x , + const ScalarB & beta , + const View< ScalarY * , L , D , MY > & y , + const View< ScalarW * , L , D , MW > & w ) +{ + Impl::WAXPBY<ScalarA,ScalarX,ScalarB,ScalarY,ScalarW,L,D> + ( n , alpha , x , beta , y , w ); +} + +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename Scalar , class L , class D > +struct Dot +{ +private: + + typedef View< const Scalar*, L, D, MemoryUnmanaged > vector_const_type ; + + const vector_const_type x ; + const vector_const_type y ; + +public: + + typedef typename vector_const_type::execution_space execution_space ; // Manycore device + typedef double value_type ; // Reduction value + + template< class ArgX , class ArgY > + inline + Dot( const size_t n , const ArgX & arg_x , const ArgY & arg_y , double & result ) + : x( arg_x ), y( arg_y ) + { + parallel_reduce( n , *this , result ); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i , value_type & update ) const + { update += x(i) * y(i); } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & source ) + { update += source; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } +}; // Dot + +//---------------------------------------------------------------------------- + +template< typename Scalar , class L , class D > +struct Dot1 +{ +private: + + typedef View< const Scalar*, L, D , MemoryUnmanaged > vector_const_type ; + + const vector_const_type x ; + +public: + + typedef typename vector_const_type::execution_space execution_space ; // Manycore device + typedef double value_type ; // Reduction value + + template< class ArgX > + inline + Dot1( const size_t n , const ArgX & arg_x , double & result ) + : x( arg_x ) + { + parallel_reduce( n , *this , result ); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i , value_type & update ) const + { update += x(i) * x(i) ; } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & source ) + { update += source ; } + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } +}; // Dot + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < typename ScalarA , + typename ScalarX , + typename ScalarB , + typename ScalarY , + typename ScalarW , + class L , class D > +struct WAXPBY +{ +private: + + typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ; + typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ; + typedef View< const ScalarY *, L , D , MemoryUnmanaged > ViewY ; + + const ViewW w ; + const ViewX x ; + const ViewY y ; + const ScalarA alpha ; + const ScalarB beta ; + +public: + + typedef typename ViewW::execution_space execution_space ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType inode ) const + { + w(inode) = alpha * x(inode) + beta * y(inode); + } + + template< class ArgX , class ArgY , class ArgW > + inline + WAXPBY( const size_t n , + const ScalarA & arg_alpha , + const ArgX & arg_x , + const ScalarB & arg_beta , + const ArgY & arg_y , + const ArgW & arg_w ) + : w( arg_w ), x( arg_x ), y( arg_y ) + , alpha( arg_alpha ), beta( arg_beta ) + { + parallel_for( n , *this ); + } +}; // WAXPBY + +//---------------------------------------------------------------------------- + +template < typename ScalarB , + typename ScalarW , + class L , class D > +struct Scale +{ +private: + + typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ; + const ViewW w ; + const ScalarB beta ; + +public: + + typedef typename ViewW::execution_space execution_space ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i ) const + { w(i) *= beta ; } + + template< class ArgW > + inline + Scale( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w ) + : w( arg_w ) + , beta( arg_beta ) + { + parallel_for( n , *this ); + } +}; + +template < typename ScalarB , + typename ScalarW , + class L , class D > +struct Fill +{ +private: + + typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ; + const ViewW w ; + const ScalarB beta ; + +public: + + typedef typename ViewW::execution_space execution_space ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i ) const + { w(i) = beta ; } + + template< class ArgW > + inline + Fill( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w ) + : w( arg_w ) + , beta( arg_beta ) + { + parallel_for( n , *this ); + } +}; + +//---------------------------------------------------------------------------- + +template < typename ScalarA , + typename ScalarX , + typename ScalarW , + class L , class D > +struct AXPY +{ +private: + + typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ; + typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ; + + const ViewW w ; + const ViewX x ; + const ScalarA alpha ; + +public: + + typedef typename ViewW::execution_space execution_space ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i ) const + { w(i) += alpha * x(i); } + + template< class ArgX , class ArgW > + inline + AXPY( const size_t n , + const ScalarA & arg_alpha , + const ArgX & arg_x , + const ArgW & arg_w ) + : w( arg_w ), x( arg_x ) + , alpha( arg_alpha ) + { + parallel_for( n , *this ); + } +}; // AXPY + +template< typename ScalarX , + typename ScalarB , + typename ScalarW , + class L , class D > +struct XPBY +{ +private: + + typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ; + typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ; + + const ViewW w ; + const ViewX x ; + const ScalarB beta ; + +public: + + typedef typename ViewW::execution_space execution_space ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + void operator()( const iType & i ) const + { w(i) = x(i) + beta * w(i); } + + template< class ArgX , class ArgW > + inline + XPBY( const size_t n , + const ArgX & arg_x , + const ScalarB & arg_beta , + const ArgW & arg_w ) + : w( arg_w ), x( arg_x ) + , beta( arg_beta ) + { + parallel_for( n , *this ); + } +}; // XPBY + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef USESCASES_LINALG_BLAS_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/Makefile b/lib/kokkos/example/multi_fem/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72e1768fcb9b446f94400a3e783767923779f6bf --- /dev/null +++ b/lib/kokkos/example/multi_fem/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3 +LINK = $(CXX) +LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/multi_fem/Nonlinear.hpp b/lib/kokkos/example/multi_fem/Nonlinear.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1d243395c25bcb4396dd0c6ed656c10aad1bad3e --- /dev/null +++ b/lib/kokkos/example/multi_fem/Nonlinear.hpp @@ -0,0 +1,573 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef HYBRIDFEM_NONLINEAR_HPP +#define HYBRIDFEM_NONLINEAR_HPP + +#include <utility> +#include <iostream> +#include <iomanip> + +#include <Kokkos_Core.hpp> +#include <SparseLinearSystem.hpp> +#include <SparseLinearSystemFill.hpp> +#include <NonlinearFunctors.hpp> + +#include <FEMesh.hpp> +#include <HexElement.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace HybridFEM { +namespace Nonlinear { + +struct PerformanceData { + double mesh_time ; + double graph_time ; + double elem_time ; + double matrix_gather_fill_time ; + double matrix_boundary_condition_time ; + double cg_iteration_time ; + size_t cg_iteration_count ; + size_t newton_iteration_count ; + double error_max ; + + PerformanceData() + : mesh_time(0) + , graph_time(0) + , elem_time(0) + , matrix_gather_fill_time(0) + , matrix_boundary_condition_time(0) + , cg_iteration_time(0) + , cg_iteration_count(0) + , newton_iteration_count(0) + , error_max(0) + {} + + void best( const PerformanceData & rhs ) + { + mesh_time = std::min( mesh_time , rhs.mesh_time ); + graph_time = std::min( graph_time , rhs.graph_time ); + elem_time = std::min( elem_time , rhs.elem_time ); + matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time ); + matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time ); + cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time ); + cg_iteration_count = std::min( cg_iteration_count , rhs.cg_iteration_count ); + newton_iteration_count = std::min( newton_iteration_count , rhs.newton_iteration_count ); + error_max = std::min( error_max , rhs.error_max ); + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +class ManufacturedSolution { +public: + + // Manufactured solution for one dimensional nonlinear PDE + // + // -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax + // + // Has an analytic solution of the form: + // + // T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 ) + // + // Given T_0 and T_L compute K for this analytic solution. + // + // Two analytic solutions: + // + // Solution with singularity: + // , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + // , b( -1.0 / sqrt(T_zmin) ) + // + // Solution without singularity: + // , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + // , b( 1.0 / sqrt(T_zmin) ) + + const double zmin ; + const double zmax ; + const double T_zmin ; + const double T_zmax ; + const double a ; + const double b ; + const double K ; + + ManufacturedSolution( const double arg_zmin , + const double arg_zmax , + const double arg_T_zmin , + const double arg_T_zmax ) + : zmin( arg_zmin ) + , zmax( arg_zmax ) + , T_zmin( arg_T_zmin ) + , T_zmax( arg_T_zmax ) + , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) + , b( 1.0 / sqrt(T_zmin) ) + , K( 1.0 / ( 6.0 * a * a ) ) + {} + + double operator()( const double z ) const + { + const double tmp = a * ( z - zmin ) + b ; + return 1.0 / ( tmp * tmp ); + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< typename Scalar , class FixtureType > +PerformanceData run( const typename FixtureType::FEMeshType & mesh , + const int , // global_max_x , + const int , // global_max_y , + const int global_max_z , + const bool print_error ) +{ + typedef Scalar scalar_type ; + typedef FixtureType fixture_type ; + typedef typename fixture_type::execution_space execution_space; + //typedef typename execution_space::size_type size_type ; // unused + + typedef typename fixture_type::FEMeshType mesh_type ; + typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ; + + enum { ElementNodeCount = fixture_type::element_node_count }; + + const comm::Machine machine = mesh.parallel_data_map.machine ; + + const size_t element_count = mesh.elem_node_ids.dimension_0(); + + //------------------------------------ + // The amount of nonlinearity is proportional to the ratio + // between T(zmax) and T(zmin). For the manufactured solution + // 0 < T(zmin) and 0 < T(zmax) + + const ManufacturedSolution + exact_solution( /* zmin */ 0 , + /* zmax */ global_max_z , + /* T(zmin) */ 1 , + /* T(zmax) */ 20 ); + + //----------------------------------- + // Convergence Criteria and perf data: + + const size_t cg_iteration_limit = 200 ; + const double cg_tolerance = 1e-14 ; + + const size_t newton_iteration_limit = 150 ; + const double newton_tolerance = 1e-14 ; + + size_t cg_iteration_count_total = 0 ; + double cg_iteration_time = 0 ; + + size_t newton_iteration_count = 0 ; + double residual_norm_init = 0 ; + double residual_norm = 0 ; + + PerformanceData perf_data ; + + //------------------------------------ + // Sparse linear system types: + + typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ; + typedef typename matrix_type::graph_type matrix_graph_type ; + typedef typename matrix_type::coefficients_type matrix_coefficients_type ; + + typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ; + + //------------------------------------ + // Problem setup types: + + typedef ElementComputation < mesh_type , scalar_type > ElementFunctor ; + typedef DirichletSolution < mesh_type , scalar_type > DirichletSolutionFunctor ; + typedef DirichletResidual < mesh_type , scalar_type > DirichletResidualFunctor ; + + typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ; + typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ; + + typedef GatherFill< matrix_type , + mesh_type , + elem_matrices_type , + elem_vectors_type > GatherFillFunctor ; + + //------------------------------------ + + matrix_type jacobian ; + vector_type residual ; + vector_type delta ; + vector_type nodal_solution ; + + typename graph_factory::element_map_type element_map ; + + //------------------------------------ + // Generate mesh and corresponding sparse matrix graph + + Kokkos::Timer wall_clock ; + + //------------------------------------ + // Generate sparse matrix graph and element->graph map. + + wall_clock.reset(); + + graph_factory::create( mesh , jacobian.graph , element_map ); + + execution_space::fence(); + + perf_data.graph_time = comm::max( machine , wall_clock.seconds() ); + + //------------------------------------ + // Allocate linear system coefficients and rhs: + + const size_t local_owned_length = jacobian.graph.row_map.dimension_0() - 1 ; + const size_t local_total_length = mesh.node_coords.dimension_0(); + + jacobian.coefficients = + matrix_coefficients_type( "jacobian_coeff" , jacobian.graph.entries.dimension_0() ); + + // Nonlinear residual for owned nodes: + residual = vector_type( "residual" , local_owned_length ); + + // Nonlinear solution for owned and ghosted nodes: + nodal_solution = vector_type( "solution" , local_total_length ); + + // Nonlinear solution update for owned nodes: + delta = vector_type( "delta" , local_owned_length ); + + //------------------------------------ + // Allocation of arrays to fill the linear system + + elem_matrices_type elem_matrices ; // Jacobian matrices + elem_vectors_type elem_vectors ; // Residual vectors + + if ( element_count ) { + elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count ); + elem_vectors = elem_vectors_type( std::string("elem_vectors"), element_count ); + } + + //------------------------------------ + // For boundary condition set the correct values in the solution vector + // The 'zmin' face is assigned to 'T_zmin'. + // The 'zmax' face is assigned to 'T_zmax'. + // The resulting solution is one dimensional along the 'Z' axis. + + DirichletSolutionFunctor::apply( nodal_solution , mesh , + exact_solution.zmin , + exact_solution.zmax , + exact_solution.T_zmin , + exact_solution.T_zmax ); + + for(;;) { // Nonlinear loop + +#if defined( KOKKOS_HAVE_MPI ) + + { //------------------------------------ + // Import off-processor nodal solution values + // for residual and jacobian computations + + Kokkos::AsyncExchange< typename vector_type::value_type , execution_space , + Kokkos::ParallelDataMap > + exchange( mesh.parallel_data_map , 1 ); + + Kokkos::PackArray< vector_type > + ::pack( exchange.buffer() , + mesh.parallel_data_map.count_interior , + mesh.parallel_data_map.count_send , + nodal_solution ); + + exchange.setup(); + + exchange.send_receive(); + + Kokkos::UnpackArray< vector_type > + ::unpack( nodal_solution , exchange.buffer() , + mesh.parallel_data_map.count_owned , + mesh.parallel_data_map.count_receive ); + } + +#endif + + //------------------------------------ + // Compute element matrices and vectors: + + wall_clock.reset(); + + ElementFunctor( mesh , + elem_matrices , + elem_vectors , + nodal_solution , + exact_solution.K ); + + execution_space::fence(); + perf_data.elem_time += comm::max( machine , wall_clock.seconds() ); + + //------------------------------------ + // Fill linear system coefficients: + + wall_clock.reset(); + + fill( jacobian.coefficients.dimension_0(), 0 , jacobian.coefficients ); + fill( residual.dimension_0() , 0 , residual ); + + GatherFillFunctor::apply( jacobian , + residual , + mesh , + element_map , + elem_matrices , + elem_vectors ); + + execution_space::fence(); + perf_data.matrix_gather_fill_time += comm::max( machine , wall_clock.seconds() ); + + // Apply boundary conditions: + + wall_clock.reset(); + + // Updates jacobian matrix to 1 on the diagonal, zero elsewhere, + // and 0 in the residual due to the solution vector having the correct value + DirichletResidualFunctor::apply( jacobian, residual, mesh , + exact_solution.zmin , + exact_solution.zmax ); + + execution_space::fence(); + perf_data.matrix_boundary_condition_time += + comm::max( machine , wall_clock.seconds() ); + + //------------------------------------ + // Has the residual converged? + + residual_norm = norm2( mesh.parallel_data_map.count_owned, + residual, + mesh.parallel_data_map.machine ); + + if ( 0 == newton_iteration_count ) { + residual_norm_init = residual_norm ; + } + + if ( residual_norm / residual_norm_init < newton_tolerance ) { + break ; + } + + //------------------------------------ + // Solve linear sytem + + size_t cg_iteration_count = 0 ; + double cg_residual_norm = 0 ; + + cgsolve( mesh.parallel_data_map , + jacobian , residual , delta , + cg_iteration_count , + cg_residual_norm , + cg_iteration_time , + cg_iteration_limit , cg_tolerance ) ; + + perf_data.cg_iteration_time += cg_iteration_time ; + cg_iteration_count_total += cg_iteration_count ; + + // Update non-linear solution with delta... + // delta is : - Dx = [Jacobian]^1 * Residual which is the negative update + // LaTeX: + // \vec {x}_{n+1} = \vec {x}_{n} - ( - \Delta \vec{x}_{n} ) + // text: + // x[n+1] = x[n] + Dx + + axpy( mesh.parallel_data_map.count_owned , + -1.0, delta, nodal_solution); + + ++newton_iteration_count ; + + if ( newton_iteration_limit < newton_iteration_count ) { + break ; + } + }; + + if ( newton_iteration_count ) { + perf_data.elem_time /= newton_iteration_count ; + perf_data.matrix_gather_fill_time /= newton_iteration_count ; + perf_data.matrix_boundary_condition_time /= newton_iteration_count ; + } + + if ( cg_iteration_count_total ) { + perf_data.cg_iteration_time /= cg_iteration_count_total ; + } + + perf_data.newton_iteration_count = newton_iteration_count ; + perf_data.cg_iteration_count = cg_iteration_count_total ; + + //------------------------------------ + + { + // For extracting the nodal solution and its coordinates: + + typename mesh_type::node_coords_type::HostMirror node_coords_host = + Kokkos::create_mirror( mesh.node_coords ); + + typename vector_type::HostMirror nodal_solution_host = + Kokkos::create_mirror( nodal_solution ); + + Kokkos::deep_copy( node_coords_host , mesh.node_coords ); + Kokkos::deep_copy( nodal_solution_host , nodal_solution ); + + double tmp = 0 ; + + for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) { + const coordinate_scalar_type x = node_coords_host(i,0); + const coordinate_scalar_type y = node_coords_host(i,1); + const coordinate_scalar_type z = node_coords_host(i,2); + + const double Tx = exact_solution(z); + const double Ts = nodal_solution_host(i); + const double Te = std::abs( Tx - Ts ) / std::abs( Tx ); + + tmp = std::max( tmp , Te ); + + if ( print_error && 0.02 < Te ) { + std::cout << " node( " << x << " " << y << " " << z << " ) = " + << Ts << " != exact_solution " << Tx + << std::endl ; + } + } + perf_data.error_max = comm::max( machine , tmp ); + } + + return perf_data ; +} + +//---------------------------------------------------------------------------- + +template< typename Scalar , class Device , class FixtureElement > +void driver( const char * const label , + comm::Machine machine , + const int gang_count , + const int elem_count_beg , + const int elem_count_end , + const int runs ) +{ + typedef Scalar scalar_type ; + typedef Device execution_space ; + typedef double coordinate_scalar_type ; + typedef FixtureElement fixture_element_type ; + + typedef BoxMeshFixture< coordinate_scalar_type , + execution_space , + fixture_element_type > fixture_type ; + + typedef typename fixture_type::FEMeshType mesh_type ; + + const size_t proc_count = comm::size( machine ); + const size_t proc_rank = comm::rank( machine ); + + if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ; + + if ( comm::rank( machine ) == 0 ) { + std::cout << std::endl ; + std::cout << "\"Kokkos::HybridFE::Nonlinear " << label << "\"" << std::endl; + std::cout + << "\"Size\" , \"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\" , \"CG-Iter\" , \"Newton-Iter\" , \"Max-node-error\"" + << std::endl + << "\"elems\" , \"nodes\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"total-count\" , \"total-count\" , \"ratio\"" + << std::endl ; + } + + const bool print_sample = 0 ; + const double x_curve = 1.0 ; + const double y_curve = 1.0 ; + const double z_curve = 0.8 ; + + for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 ) + { + const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) ); + const int iy = 1 + ix ; + const int iz = 2 * iy ; + const int global_elem_count = ix * iy * iz ; + const int global_node_count = ( 2 * ix + 1 ) * + ( 2 * iy + 1 ) * + ( 2 * iz + 1 ); + + mesh_type mesh = + fixture_type::create( proc_count , proc_rank , gang_count , + ix , iy , iz , + x_curve , y_curve , z_curve ); + + mesh.parallel_data_map.machine = machine ; + + + PerformanceData perf_data , perf_best ; + + for(int j = 0; j < runs; j++){ + + perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, print_sample ); + + if( j == 0 ) { + perf_best = perf_data ; + } + else { + perf_best.best( perf_data ); + } + } + + if ( comm::rank( machine ) == 0 ) { + + std::cout << std::setw(8) << global_elem_count << " , " + << std::setw(8) << global_node_count << " , " + << std::setw(10) << perf_best.graph_time * 1000 << " , " + << std::setw(10) << perf_best.elem_time * 1000 << " , " + << std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , " + << std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , " + << std::setw(10) << perf_best.cg_iteration_time * 1000 << " , " + << std::setw(7) << perf_best.cg_iteration_count << " , " + << std::setw(3) << perf_best.newton_iteration_count << " , " + << std::setw(10) << perf_best.error_max + << std::endl ; + } + } +} + +//---------------------------------------------------------------------------- + +} /* namespace Nonlinear */ +} /* namespace HybridFEM */ + + +#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */ + diff --git a/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp b/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b2adc2adab302ec05f4ca2218e0321583f52a044 --- /dev/null +++ b/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp @@ -0,0 +1,390 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include <stdio.h> + +#include <iostream> +#include <fstream> +#include <iomanip> +#include <cstdlib> +#include <cmath> + +#include <Kokkos_Core.hpp> +#include <HexElement.hpp> +#include <FEMesh.hpp> + +namespace HybridFEM { +namespace Nonlinear { + +template< class MeshType , typename ScalarType > struct ElementComputation ; + +//---------------------------------------------------------------------------- + +template<> +struct ElementComputation< FEMesh< double , 27 , Kokkos::Cuda > , double > +{ + typedef Kokkos::Cuda execution_space ; + + static const unsigned ElementNodeCount = 27 ; + + typedef HexElement_Data< ElementNodeCount > element_data_type ; + typedef FEMesh< double , ElementNodeCount , execution_space > mesh_type ; + + static const unsigned SpatialDim = element_data_type::spatial_dimension ; + static const unsigned FunctionCount = element_data_type::function_count ; + static const unsigned IntegrationCount = element_data_type::integration_count ; + static const unsigned TensorDim = SpatialDim * SpatialDim ; + + typedef Kokkos::View< double[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ; + typedef Kokkos::View< double[][FunctionCount] , execution_space > elem_vectors_type ; + typedef Kokkos::View< double[] , execution_space > value_vector_type ; + +private: + + const element_data_type elem_data ; + const typename mesh_type::elem_node_ids_type elem_node_ids ; + const typename mesh_type::node_coords_type node_coords ; + const value_vector_type nodal_values ; + const elem_matrices_type element_matrices ; + const elem_vectors_type element_vectors ; + const float coeff_K ; + const unsigned elem_count ; + unsigned invJacIndex[9][4] ; + + static const unsigned j11 = 0 , j12 = 1 , j13 = 2 , + j21 = 3 , j22 = 4 , j23 = 5 , + j31 = 6 , j32 = 7 , j33 = 8 ; + + // Can only handle up to 16 warps: + static const unsigned BlockDimX = 32 ; + static const unsigned BlockDimY = 7 ; + + struct WorkSpace { + double sum[ BlockDimY ][ BlockDimX ]; + + double value_at_integ[ IntegrationCount ]; + double gradx_at_integ[ IntegrationCount ]; + double grady_at_integ[ IntegrationCount ]; + double gradz_at_integ[ IntegrationCount ]; + + float spaceJac[ BlockDimY ][ 9 ]; + float spaceInvJac[ BlockDimY ][ 9 ]; + + float detJweight[ IntegrationCount ]; + + float dpsidx[ FunctionCount ][ IntegrationCount ]; + float dpsidy[ FunctionCount ][ IntegrationCount ]; + float dpsidz[ FunctionCount ][ IntegrationCount ]; + }; + +public: + + ElementComputation ( const mesh_type & arg_mesh , + const elem_matrices_type & arg_element_matrices , + const elem_vectors_type & arg_element_vectors , + const value_vector_type & arg_nodal_values , + const float arg_coeff_K ) + : elem_data() + , elem_node_ids( arg_mesh.elem_node_ids ) + , node_coords( arg_mesh.node_coords ) + , nodal_values( arg_nodal_values ) + , element_matrices( arg_element_matrices ) + , element_vectors( arg_element_vectors ) + , coeff_K( arg_coeff_K ) + , elem_count( arg_mesh.elem_node_ids.dimension_0() ) + { + const unsigned jInvJ[9][4] = + { { j22 , j33 , j23 , j32 } , + { j13 , j32 , j12 , j33 } , + { j12 , j23 , j13 , j22 } , + + { j23 , j31 , j21 , j33 } , + { j11 , j33 , j13 , j31 } , + { j13 , j21 , j11 , j23 } , + + { j21 , j32 , j22 , j31 } , + { j12 , j31 , j11 , j32 } , + { j11 , j22 , j12 , j21 } }; + + for ( unsigned i = 0 ; i < 9 ; ++i ) { + for ( unsigned j = 0 ; j < 4 ; ++j ) { + invJacIndex[i][j] = jInvJ[i][j] ; + } + } + + const unsigned shmem = sizeof(WorkSpace); + const unsigned grid_max = 65535 ; + const unsigned grid_count = std::min( grid_max , elem_count ); + + // For compute capability 2.x up to 1024 threads per block + const dim3 block( BlockDimX , BlockDimY , 1 ); + const dim3 grid( grid_count , 1 , 1 ); + + Kokkos::Impl::CudaParallelLaunch< ElementComputation >( *this , grid , block , shmem ); + } + +public: + + //------------------------------------ + // Sum among the threadIdx.x + + template< typename Type > + __device__ inline static + void sum_x( Type & result , const double value ) + { + extern __shared__ WorkSpace work_data[] ; + + volatile double * const base_sum = + & work_data->sum[ threadIdx.y ][ threadIdx.x ] ; + + base_sum[ 0] = value ; + + if ( threadIdx.x < 16 ) { + base_sum[0] += base_sum[16]; + base_sum[0] += base_sum[ 8]; + base_sum[0] += base_sum[ 4]; + base_sum[0] += base_sum[ 2]; + base_sum[0] += base_sum[ 1]; + } + + if ( 0 == threadIdx.x ) { + result = base_sum[0] ; + } + } + + __device__ inline static + void sum_x_clear() + { + extern __shared__ WorkSpace work_data[] ; + + work_data->sum[ threadIdx.y ][ threadIdx.x ] = 0 ; + } + + //------------------------------------ + //------------------------------------ + + __device__ inline + void evaluateFunctions( const unsigned ielem ) const + { + extern __shared__ WorkSpace work_data[] ; + + // Each warp (threadIdx.y) computes an integration point + // Each thread is responsible for a node / function. + + const unsigned iFunc = threadIdx.x ; + const bool hasFunc = iFunc < FunctionCount ; + + //------------------------------------ + // Each warp gathers a different variable into 'elem_mat' shared memory. + + if ( hasFunc ) { + + const unsigned node = elem_node_ids( ielem , iFunc ); + + for ( unsigned iy = threadIdx.y ; iy < 4 ; iy += blockDim.y ) { + switch( iy ) { + case 0 : work_data->sum[0][iFunc] = node_coords(node,0); break ; + case 1 : work_data->sum[1][iFunc] = node_coords(node,1); break ; + case 2 : work_data->sum[2][iFunc] = node_coords(node,2); break ; + case 3 : work_data->sum[3][iFunc] = nodal_values(node); break ; + default: break ; + } + } + } + + __syncthreads(); // Wait for all warps to finish gathering + + // now get local 'const' copies in register space: + + const double x = work_data->sum[0][ iFunc ]; + const double y = work_data->sum[1][ iFunc ]; + const double z = work_data->sum[2][ iFunc ]; + const double dof_val = work_data->sum[3][ iFunc ]; + + __syncthreads(); // Wait for all warps to finish extracting + + sum_x_clear(); // Make sure summation scratch is zero + + //------------------------------------ + // Each warp is now on its own computing an integration point + // so no further explicit synchronizations are required. + + if ( hasFunc ) { + + float * const J = work_data->spaceJac[ threadIdx.y ]; + float * const invJ = work_data->spaceInvJac[ threadIdx.y ]; + + for ( unsigned iInt = threadIdx.y ; + iInt < IntegrationCount ; iInt += blockDim.y ) { + + const float val = elem_data.values[iInt][iFunc] ; + const float gx = elem_data.gradients[iInt][0][iFunc] ; + const float gy = elem_data.gradients[iInt][1][iFunc] ; + const float gz = elem_data.gradients[iInt][2][iFunc] ; + + sum_x( J[j11], gx * x ); + sum_x( J[j12], gx * y ); + sum_x( J[j13], gx * z ); + + sum_x( J[j21], gy * x ); + sum_x( J[j22], gy * y ); + sum_x( J[j23], gy * z ); + + sum_x( J[j31], gz * x ); + sum_x( J[j32], gz * y ); + sum_x( J[j33], gz * z ); + + // Inverse jacobian, only enough parallel work for 9 threads in the warp + + if ( iFunc < TensorDim ) { + + invJ[ iFunc ] = + J[ invJacIndex[iFunc][0] ] * J[ invJacIndex[iFunc][1] ] - + J[ invJacIndex[iFunc][2] ] * J[ invJacIndex[iFunc][3] ] ; + + // Let all threads in the warp compute determinant into a register + + const float detJ = J[j11] * invJ[j11] + + J[j21] * invJ[j12] + + J[j31] * invJ[j13] ; + + invJ[ iFunc ] /= detJ ; + + if ( 0 == iFunc ) { + work_data->detJweight[ iInt ] = detJ * elem_data.weights[ iInt ] ; + } + } + + // Transform bases gradients and compute value and gradient + + const float dx = gx * invJ[j11] + gy * invJ[j12] + gz * invJ[j13]; + const float dy = gx * invJ[j21] + gy * invJ[j22] + gz * invJ[j23]; + const float dz = gx * invJ[j31] + gy * invJ[j32] + gz * invJ[j33]; + + work_data->dpsidx[iFunc][iInt] = dx ; + work_data->dpsidy[iFunc][iInt] = dy ; + work_data->dpsidz[iFunc][iInt] = dz ; + + sum_x( work_data->gradx_at_integ[iInt] , dof_val * dx ); + sum_x( work_data->grady_at_integ[iInt] , dof_val * dy ); + sum_x( work_data->gradz_at_integ[iInt] , dof_val * dz ); + sum_x( work_data->value_at_integ[iInt] , dof_val * val ); + } + } + + __syncthreads(); // All shared data must be populated at return. + } + + __device__ inline + void contributeResidualJacobian( const unsigned ielem ) const + { + extern __shared__ WorkSpace work_data[] ; + + sum_x_clear(); // Make sure summation scratch is zero + + // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ + // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ + + const unsigned iInt = threadIdx.x ; + + if ( iInt < IntegrationCount ) { + + const double value_at_integ = work_data->value_at_integ[ iInt ] ; + const double gradx_at_integ = work_data->gradx_at_integ[ iInt ] ; + const double grady_at_integ = work_data->grady_at_integ[ iInt ] ; + const double gradz_at_integ = work_data->gradz_at_integ[ iInt ] ; + + const float detJweight = work_data->detJweight[ iInt ] ; + const float coeff_K_detJweight = coeff_K * detJweight ; + + for ( unsigned iRow = threadIdx.y ; + iRow < FunctionCount ; iRow += blockDim.y ) { + + const float value_row = elem_data.values[ iInt ][ iRow ] * detJweight ; + const float dpsidx_row = work_data->dpsidx[ iRow ][ iInt ] * coeff_K_detJweight ; + const float dpsidy_row = work_data->dpsidy[ iRow ][ iInt ] * coeff_K_detJweight ; + const float dpsidz_row = work_data->dpsidz[ iRow ][ iInt ] * coeff_K_detJweight ; + + const double res_del = dpsidx_row * gradx_at_integ + + dpsidy_row * grady_at_integ + + dpsidz_row * gradz_at_integ ; + + const double res_val = value_at_integ * value_at_integ * value_row ; + const double jac_val_row = 2 * value_at_integ * value_row ; + + sum_x( element_vectors( ielem , iRow ) , res_del + res_val ); + + for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) { + + const float jac_del = + dpsidx_row * work_data->dpsidx[iCol][iInt] + + dpsidy_row * work_data->dpsidy[iCol][iInt] + + dpsidz_row * work_data->dpsidz[iCol][iInt] ; + + const double jac_val = + jac_val_row * elem_data.values[ iInt ][ iCol ] ; + + sum_x( element_matrices( ielem , iRow , iCol ) , jac_del + jac_val ); + } + } + } + + __syncthreads(); // All warps finish before refilling shared data + } + + __device__ inline + void operator()(void) const + { + extern __shared__ WorkSpace work_data[] ; + + for ( unsigned ielem = blockIdx.x ; ielem < elem_count ; ielem += gridDim.x ) { + + evaluateFunctions( ielem ); + + contributeResidualJacobian( ielem ); + } + } + +}; /* ElementComputation */ + +} /* namespace Nonlinear */ +} /* namespace HybridFEM */ + diff --git a/lib/kokkos/example/multi_fem/NonlinearFunctors.hpp b/lib/kokkos/example/multi_fem/NonlinearFunctors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9628236181f034f242ce11c2f56783ba9b934797 --- /dev/null +++ b/lib/kokkos/example/multi_fem/NonlinearFunctors.hpp @@ -0,0 +1,482 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_NONLINEARFUNCTORS_HPP +#define KOKKOS_NONLINEARFUNCTORS_HPP + +#include <iostream> +#include <fstream> +#include <iomanip> +#include <cstdlib> +#include <cmath> + +namespace HybridFEM { +namespace Nonlinear { + +template< class MeshType , typename ScalarType > struct ElementComputation ; +template< class MeshType , typename ScalarType > struct DirichletSolution ; +template< class MeshType , typename ScalarType > struct DirichletResidual ; + +} +} + +/* A Cuda-specific specialization for the element computation functor. */ +#if defined( __CUDACC__ ) +#include <NonlinearElement_Cuda.hpp> +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace HybridFEM { +namespace Nonlinear { + +template< typename ScalarCoordType , unsigned ElemNode , class DeviceType , + typename ScalarType > +struct ElementComputation< + FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType > +{ + typedef DeviceType execution_space; + typedef ScalarType scalar_type ; + + static const unsigned ElementNodeCount = ElemNode ; + + typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ; + + typedef HexElement_Data< ElementNodeCount > element_data_type ; + + static const unsigned SpatialDim = element_data_type::spatial_dimension ; + static const unsigned FunctionCount = element_data_type::function_count ; + static const unsigned IntegrationCount = element_data_type::integration_count ; + static const unsigned TensorDim = SpatialDim * SpatialDim ; + + typedef Kokkos::View< scalar_type[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ; + typedef Kokkos::View< scalar_type[][FunctionCount] , execution_space > elem_vectors_type ; + typedef Kokkos::View< scalar_type[] , execution_space > value_vector_type ; + + +private: + + const element_data_type elem_data ; + typename mesh_type::elem_node_ids_type elem_node_ids ; + typename mesh_type::node_coords_type node_coords ; + value_vector_type nodal_values ; + elem_matrices_type element_matrices ; + elem_vectors_type element_vectors ; + scalar_type coeff_K ; + +public: + + ElementComputation( const mesh_type & arg_mesh , + const elem_matrices_type & arg_element_matrices , + const elem_vectors_type & arg_element_vectors , + const value_vector_type & arg_nodal_values , + const scalar_type arg_coeff_K ) + : elem_data() + , elem_node_ids( arg_mesh.elem_node_ids ) + , node_coords( arg_mesh.node_coords ) + , nodal_values( arg_nodal_values ) + , element_matrices( arg_element_matrices ) + , element_vectors( arg_element_vectors ) + , coeff_K( arg_coeff_K ) + { + const size_t elem_count = arg_mesh.elem_node_ids.dimension_0(); + + parallel_for( elem_count , *this ); + } + + //------------------------------------ + + static const unsigned FLOPS_transform_gradients = + /* Jacobian */ FunctionCount * TensorDim * 2 + + /* Inverse jacobian */ TensorDim * 6 + 6 + + /* Gradient transform */ FunctionCount * 15 ; + + KOKKOS_INLINE_FUNCTION + float transform_gradients( + const float grad[][ FunctionCount ] , // Gradient of bases master element + const double x[] , + const double y[] , + const double z[] , + float dpsidx[] , + float dpsidy[] , + float dpsidz[] ) const + { + enum { j11 = 0 , j12 = 1 , j13 = 2 , + j21 = 3 , j22 = 4 , j23 = 5 , + j31 = 6 , j32 = 7 , j33 = 8 }; + + // Jacobian accumulation: + + double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + for( unsigned i = 0; i < FunctionCount ; ++i ) { + const double x1 = x[i] ; + const double x2 = y[i] ; + const double x3 = z[i] ; + + const float g1 = grad[0][i] ; + const float g2 = grad[1][i] ; + const float g3 = grad[2][i] ; + + J[j11] += g1 * x1 ; + J[j12] += g1 * x2 ; + J[j13] += g1 * x3 ; + + J[j21] += g2 * x1 ; + J[j22] += g2 * x2 ; + J[j23] += g2 * x3 ; + + J[j31] += g3 * x1 ; + J[j32] += g3 * x2 ; + J[j33] += g3 * x3 ; + } + + // Inverse jacobian: + + float invJ[ TensorDim ] = { + static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) , + static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) , + static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) , + + static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) , + static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) , + static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) , + + static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) , + static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) , + static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) }; + + const float detJ = J[j11] * invJ[j11] + + J[j21] * invJ[j12] + + J[j31] * invJ[j13] ; + + const float detJinv = 1.0 / detJ ; + + for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; } + + // Transform gradients: + + for( unsigned i = 0; i < FunctionCount ; ++i ) { + const float g0 = grad[0][i]; + const float g1 = grad[1][i]; + const float g2 = grad[2][i]; + + dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13]; + dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23]; + dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33]; + } + + return detJ ; + } + + KOKKOS_INLINE_FUNCTION + void contributeResidualJacobian( + const float coeff_k , + const double dof_values[] , + const float dpsidx[] , + const float dpsidy[] , + const float dpsidz[] , + const float detJ , + const float integ_weight , + const float bases_vals[] , + double elem_res[] , + double elem_mat[][ FunctionCount ] ) const + { + double value_at_pt = 0 ; + double gradx_at_pt = 0 ; + double grady_at_pt = 0 ; + double gradz_at_pt = 0 ; + + for ( unsigned m = 0 ; m < FunctionCount ; m++ ) { + value_at_pt += dof_values[m] * bases_vals[m] ; + gradx_at_pt += dof_values[m] * dpsidx[m] ; + grady_at_pt += dof_values[m] * dpsidy[m] ; + gradz_at_pt += dof_values[m] * dpsidz[m] ; + } + + const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ; + const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ; + const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ; + + // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ + // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ + + for ( unsigned m = 0; m < FunctionCount; m++) { + double * const mat = elem_mat[m] ; + const float bases_val_m = bases_vals[m]; + const float dpsidx_m = dpsidx[m] ; + const float dpsidy_m = dpsidy[m] ; + const float dpsidz_m = dpsidz[m] ; + + elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt + + dpsidy_m * grady_at_pt + + dpsidz_m * gradz_at_pt ) + + res_val * bases_val_m ; + + for( unsigned n = 0; n < FunctionCount; n++) { + + mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] + + dpsidy_m * dpsidy[n] + + dpsidz_m * dpsidz[n] ) + + mat_val * bases_val_m * bases_vals[n]; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned ielem ) const + { + // Gather nodal coordinates and solution vector: + + double x[ FunctionCount ] ; + double y[ FunctionCount ] ; + double z[ FunctionCount ] ; + double val[ FunctionCount ] ; + + for ( unsigned i = 0 ; i < ElementNodeCount ; ++i ) { + const unsigned node_index = elem_node_ids( ielem , i ); + + x[i] = node_coords( node_index , 0 ); + y[i] = node_coords( node_index , 1 ); + z[i] = node_coords( node_index , 2 ); + + val[i] = nodal_values( node_index ); + } + + double elem_vec[ FunctionCount ] ; + double elem_mat[ FunctionCount ][ FunctionCount ] ; + + for( unsigned i = 0; i < FunctionCount ; i++ ) { + elem_vec[i] = 0 ; + for( unsigned j = 0; j < FunctionCount ; j++){ + elem_mat[i][j] = 0 ; + } + } + + for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) { + float dpsidx[ FunctionCount ] ; + float dpsidy[ FunctionCount ] ; + float dpsidz[ FunctionCount ] ; + + const float detJ = + transform_gradients( elem_data.gradients[i] , x , y , z , + dpsidx , dpsidy , dpsidz ); + + contributeResidualJacobian( coeff_K , + val , dpsidx , dpsidy , dpsidz , + detJ , + elem_data.weights[i] , + elem_data.values[i] , + elem_vec , elem_mat ); + } + + for( unsigned i = 0; i < FunctionCount ; i++){ + element_vectors(ielem, i) = elem_vec[i] ; + for( unsigned j = 0; j < FunctionCount ; j++){ + element_matrices(ielem, i, j) = elem_mat[i][j] ; + } + } + } + +}; /* ElementComputation */ + +//---------------------------------------------------------------------------- + +template< typename ScalarCoordType , unsigned ElemNode , class DeviceType , + typename ScalarType > +struct DirichletSolution< + FEMesh< ScalarCoordType , ElemNode , DeviceType > , + ScalarType > +{ + typedef DeviceType execution_space; + + static const unsigned ElementNodeCount = ElemNode ; + + typedef Kokkos::View< ScalarType[] , execution_space > vector_type ; + + typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ; + + typename mesh_type::node_coords_type node_coords ; + + vector_type solution ; + ScalarCoordType bc_lower_z ; + ScalarCoordType bc_upper_z ; + ScalarType bc_lower_value ; + ScalarType bc_upper_value ; + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned inode ) const + { + + // Apply dirichlet boundary condition on the Solution vector. + // Define boundary node values to be either bc_lower_value or + // bc_upper_value, depending on which boundary face they lie on. + // Non-boundary terms will be left at their previous value. + + const ScalarCoordType z = node_coords(inode,2); + const bool bc_lower = z <= bc_lower_z ; + const bool bc_upper = bc_upper_z <= z ; + + if ( bc_lower || bc_upper ) { + const ScalarType bc_value = bc_lower ? bc_lower_value + : bc_upper_value ; + + solution(inode) = bc_value ; // set the solution vector + } + } + + static void apply( const vector_type & solution , + const mesh_type & mesh , + const ScalarCoordType bc_lower_z , + const ScalarCoordType bc_upper_z , + const ScalarType bc_lower_value , + const ScalarType bc_upper_value ) + { + DirichletSolution op ; + op.node_coords = mesh.node_coords ; + op.solution = solution ; + op.bc_lower_z = bc_lower_z ; + op.bc_upper_z = bc_upper_z ; + op.bc_lower_value = bc_lower_value ; + op.bc_upper_value = bc_upper_value ; + parallel_for( solution.dimension_0() , op ); + } +}; + +//---------------------------------------------------------------------------- + +template< typename ScalarCoordType , unsigned ElemNode , class DeviceType , + typename ScalarType > +struct DirichletResidual< + FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType > +{ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type ; + + static const unsigned ElementNodeCount = ElemNode ; + + typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ; + typedef Kokkos::View< ScalarType[] , execution_space > vector_type ; + + typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ; + + typename mesh_type::node_coords_type node_coords ; + matrix_type matrix ; + vector_type rhs ; + ScalarCoordType bc_lower_z ; + ScalarCoordType bc_upper_z ; + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned inode ) const + { + // Apply a dirichlet boundary condition to 'irow' + // to maintain the symmetry of the original + // global stiffness matrix, zero out the columns + // that correspond to boundary conditions, and + // adjust the load vector accordingly + + const size_type iBeg = matrix.graph.row_map[inode]; + const size_type iEnd = matrix.graph.row_map[inode+1]; + + const ScalarCoordType z = node_coords(inode,2); + const bool bc_lower = z <= bc_lower_z ; + const bool bc_upper = bc_upper_z <= z ; + + if ( bc_lower || bc_upper ) { + rhs(inode) = 0 ; // set the residual vector + + // zero each value on the row, and leave a one + // on the diagonal + + for( size_type i = iBeg ; i < iEnd ; i++) { + matrix.coefficients(i) = + (int) inode == matrix.graph.entries(i) ? 1 : 0 ; + } + } + else { + + // Find any columns that are boundary conditions. + // Clear them and adjust the load vector + + for( size_type i = iBeg ; i < iEnd ; i++ ) { + const size_type cnode = matrix.graph.entries(i) ; + + const ScalarCoordType zc = node_coords(cnode,2); + const bool c_bc_lower = zc <= bc_lower_z ; + const bool c_bc_upper = bc_upper_z <= zc ; + + if ( c_bc_lower || c_bc_upper ) { + + matrix.coefficients(i) = 0 ; + } + } + } + } + + + static void apply( const matrix_type & linsys_matrix , + const vector_type & linsys_rhs , + const mesh_type & mesh , + const ScalarCoordType bc_lower_z , + const ScalarCoordType bc_upper_z) + { + const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ; + + DirichletResidual op ; + op.node_coords = mesh.node_coords ; + op.matrix = linsys_matrix ; + op.rhs = linsys_rhs ; + op.bc_lower_z = bc_lower_z ; + op.bc_upper_z = bc_upper_z ; + parallel_for( row_count , op ); + } +}; + +//---------------------------------------------------------------------------- + +} /* namespace Nonlinear */ +} /* namespace HybridFEM */ + +#endif /* #ifndef KOKKOS_NONLINEARFUNCTORS_HPP */ + diff --git a/lib/kokkos/example/multi_fem/ParallelComm.hpp b/lib/kokkos/example/multi_fem/ParallelComm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..163e84a958625cbef857e94a42aa7bb51c0b2e1f --- /dev/null +++ b/lib/kokkos/example/multi_fem/ParallelComm.hpp @@ -0,0 +1,167 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef PARALLELCOMM_HPP +#define PARALLELCOMM_HPP + +//------------------------------------------------------------------------ + +#include <Kokkos_Macros.hpp> + +//------------------------------------------------------------------------ + +#if defined( KOKKOS_HAVE_MPI ) + +#include <mpi.h> +#include <string> + +namespace comm { + +struct Machine { + MPI_Comm mpi_comm ; + + Machine() : mpi_comm( MPI_COMM_NULL ) {} + + Machine( const Machine & rhs ) + : mpi_comm( rhs.mpi_comm ) {} + + Machine( MPI_Comm c ) : mpi_comm( c ) {} + + static Machine init( int * argc , char *** argv ) + { + MPI_Init( argc , argv ); + return Machine( MPI_COMM_WORLD ); + } + + static void finalize() { MPI_Finalize(); } +}; + +inline +unsigned size( Machine machine ) +{ + int np ; MPI_Comm_size( machine.mpi_comm , & np ); return np ; +} + +inline +unsigned rank( Machine machine ) +{ + int ip ; MPI_Comm_rank( machine.mpi_comm , & ip ); return ip ; +} + +inline +double max( Machine machine , double local ) +{ + double global = 0; + MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , machine.mpi_comm ); + return global ; +} + +inline +std::string command_line( Machine machine , const int argc , const char * const * const argv ) +{ + std::string argline ; + + if ( 0 == rank( machine ) ) { + for ( int i = 1 ; i < argc ; ++i ) { + argline.append(" ").append( argv[i] ); + } + } + + int length = argline.length(); + MPI_Bcast( & length , 1 , MPI_INT , 0 , machine.mpi_comm ); + argline.resize( length , ' ' ); + MPI_Bcast( (void*) argline.data() , length , MPI_CHAR , 0 , machine.mpi_comm ); + + return argline ; +} + +} + +#else /* ! defined( KOKKOS_HAVE_MPI ) */ + +#include <string> + +namespace comm { + +// Stub for non-parallel + +struct Machine { + static Machine init( int * , char *** ) + { return Machine(); } + + static void finalize() {} +}; + +inline +unsigned size( Machine ) { return 1 ; } + +inline +unsigned rank( Machine ) { return 0 ; } + +inline +double max( Machine , double local ) +{ return local ; } + +inline +std::string command_line( Machine machine , const int argc , const char * const * const argv ) +{ + std::string argline ; + + if ( 0 == rank( machine ) ) { + for ( int i = 1 ; i < argc ; ++i ) { + argline.append(" ").append( argv[i] ); + } + } + + return argline ; +} + +} + +#endif /* ! defined( KOKKOS_HAVE_MPI ) */ + +//------------------------------------------------------------------------ + +#endif /* #ifndef PARALLELCOMM_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/ParallelDataMap.hpp b/lib/kokkos/example/multi_fem/ParallelDataMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..22a1cfefa1df9aab3cc0ad2823510f4d98e50fa2 --- /dev/null +++ b/lib/kokkos/example/multi_fem/ParallelDataMap.hpp @@ -0,0 +1,517 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_PARALLELDATAMAP_HPP +#define KOKKOS_PARALLELDATAMAP_HPP + +#include <utility> +#include <limits> +#include <iostream> +#include <sstream> +#include <stdexcept> + +#include <Kokkos_Core.hpp> +#include <ParallelComm.hpp> + +namespace Kokkos { + +//---------------------------------------------------------------------------- +/** \brief Parallel distributed data mapping + * + * ordering { interior : { owned items not sent elsewhere } + * send : { owned items sent } + * receive : { not-owned items received } } + * + * recv { { N ghosted items from process P : ( P , N ) } } + * + * send { { N send items to process P : ( P , N ) } } + * + * send_item { send item offsets within 'send' range } + */ +struct ParallelDataMap { + typedef View< unsigned*[2], HostSpace > host_recv_type ; + typedef View< unsigned*[2], HostSpace > host_send_type ; + typedef View< unsigned* , HostSpace > host_send_item_type ; + + comm::Machine machine ; + host_recv_type host_recv ; + host_send_type host_send ; + host_send_item_type host_send_item ; + unsigned count_interior ; + unsigned count_send ; + unsigned count_owned ; // = count_interior + count_send + unsigned count_receive ; + + void assign( const unsigned arg_count_interior , + const unsigned arg_count_owned , + const unsigned arg_count_total , + const unsigned arg_recv_msg , + const unsigned arg_send_msg , + const unsigned arg_send_count ) + { + const std::string label("Kokkos::ParallelDataMap buffer"); + + count_interior = arg_count_interior ; + count_owned = arg_count_owned ; + count_send = arg_count_owned - arg_count_interior ; + count_receive = arg_count_total - arg_count_owned ; + + host_recv = host_recv_type( label , arg_recv_msg ); + host_send = host_send_type( label , arg_send_msg ); + host_send_item = host_send_item_type( label , arg_send_count ); + } +}; + +//---------------------------------------------------------------------------- +//PackArray +//---------------------------------------------------------------------------- +template< class ArrayType , class Rank = void > +struct PackArray ; + +template< typename DeviceType, typename ValueType > +struct PackArray< View< ValueType* , DeviceType > , void > +{ + typedef DeviceType execution_space ; + typedef typename DeviceType::size_type size_type ; + typedef View< ValueType* , execution_space > array_type ; + typedef View< ValueType* , execution_space > buffer_type ; + +private: + + buffer_type output ; + array_type input ; + size_type base ; + +public: + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { output[i] = input(base+i); } + + inline + static + void pack( const buffer_type & arg_output , + const size_type arg_begin , + const size_type arg_count , + const array_type & arg_input ) + { + PackArray op ; + op.output = arg_output ; + op.input = arg_input ; + op.base = arg_begin ; + parallel_for( arg_count , op ); + } +}; + +template< typename DeviceType, typename ValueType , unsigned N1 > +struct PackArray< View< ValueType*[N1] , DeviceType > , void > +{ + typedef DeviceType execution_space ; + typedef typename DeviceType::size_type size_type ; + typedef View< ValueType*[N1] , execution_space > array_type ; + typedef View< ValueType* , execution_space > buffer_type ; + +private: + + buffer_type output ; + array_type input ; + size_type base ; + +public: + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { + for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) { + output[k] = input(base+i,j); + } + } + + inline static + void pack( const buffer_type & arg_output , + const size_type arg_begin , + const size_type arg_count , + const array_type & arg_input ) + { + if ( arg_count ) { + PackArray op ; + op.output = arg_output ; + op.input = arg_input ; + op.base = arg_begin ; + parallel_for( arg_count , op ); + } + } +}; + +//---------------------------------------------------------------------------- +//UnpackArray +//---------------------------------------------------------------------------- +template< class ArrayType , class Rank = void > struct UnpackArray ; + +template< typename DeviceType, typename ValueType > +struct UnpackArray< View< ValueType* , DeviceType > , void > +{ + typedef DeviceType execution_space ; + typedef typename DeviceType::size_type size_type ; + typedef View< ValueType* , execution_space > array_type ; + typedef View< ValueType* , execution_space > buffer_type ; + +private: + + array_type output ; + buffer_type input ; + size_type base ; + +public: + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { output(base+i) = input[i]; } + + inline + static + void unpack( const array_type & arg_output , + const buffer_type & arg_input , + const size_type arg_begin , + const size_type arg_count ) + { + UnpackArray op ; + op.output = arg_output ; + op.input = arg_input ; + op.base = arg_begin ; + parallel_for( arg_count , op ); + } +}; + +template< typename DeviceType, typename ValueType , unsigned N1 > +struct UnpackArray< View< ValueType*[N1] , DeviceType > , void > +{ + typedef DeviceType execution_space ; + typedef typename DeviceType::size_type size_type ; + typedef View< ValueType* , execution_space > buffer_type ; + typedef View< ValueType*[N1] , execution_space > array_type ; + +private: + + array_type output ; + buffer_type input ; + size_type base ; + +public: + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i ) const + { + for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) { + output(base+i,j) = input(k); + } + } + + inline + static + void unpack( const array_type & arg_output , + const buffer_type & arg_input , + const size_type arg_begin , + const size_type arg_count ) + { + if ( arg_count ) { + UnpackArray op ; + op.output = arg_output ; + op.input = arg_input ; + op.base = arg_begin ; + parallel_for( arg_count , op ); + } + } +}; +//---------------------------------------------------------------------------- +template< class ValueType , class Device , class DataMap > +class AsyncExchange ; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +// Application call procedure: +// +// construct: AsyncExchange object +// * pack send buffer on device +// initiate: copy send buffer from device to host +// * dispatch asynchronous local work +// complete: send/receive on host, copy receive buffer to device +// * unpack receive buffer on device +// destroy: AsyncExchange object +// +//---------------------------------------------------------------------------- + +#ifdef KOKKOS_HAVE_MPI + +namespace Kokkos { + +template< class ValueType , class Device > +class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > { +public: + + typedef Device execution_space ; + typedef Kokkos::ParallelDataMap data_map_type ; + typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ; + typedef typename buffer_dev_type::HostMirror buffer_host_type ; + +private: + + static const int mpi_tag = 11 ; + + const data_map_type data_map ; + unsigned chunk_size ; + unsigned send_count_max ; + buffer_host_type host_recv_buffer ; + buffer_host_type host_send_buffer ; + buffer_host_type send_msg_buffer ; + buffer_dev_type dev_buffer ; + buffer_dev_type dev_send_buffer ; // Subview for send + buffer_dev_type dev_recv_buffer ; // Subview for receive + std::vector< MPI_Request > recv_request ; + +public: + + const buffer_dev_type & buffer() const { return dev_buffer ; } + + AsyncExchange( const data_map_type & arg_data_map , + const size_t arg_chunk ) + : data_map( arg_data_map ) + , chunk_size( arg_chunk ) + , send_count_max( 0 ) + , host_recv_buffer() + , host_send_buffer() + , send_msg_buffer() + , dev_buffer() + , dev_send_buffer() + , dev_recv_buffer() + , recv_request() + { + const size_t send_msg_count = arg_data_map.host_send.dimension_0(); + const size_t recv_msg_count = arg_data_map.host_recv.dimension_0(); + + const size_t send_msg_length = arg_chunk * arg_data_map.count_send ; + const size_t recv_msg_length = arg_chunk * arg_data_map.count_receive ; + + for ( size_t i = 0 ; i < send_msg_count ; ++i ) { + send_count_max = std::max( send_count_max , + (unsigned) arg_data_map.host_send(i,1) ); + } + + // A single shared buffer on the device can be used for + // send and receive message buffers. + dev_buffer = buffer_dev_type( + std::string("AsyncExchange dev_buffer") , + std::max( send_msg_length , recv_msg_length ) ); + + // Total send subview of the device buffer + dev_send_buffer = + Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , send_msg_length ) ); + + // Total receive subview of the device buffer + dev_recv_buffer = + Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , recv_msg_length ) ); + + // Total receive message buffer on the host: + host_recv_buffer = buffer_host_type( + std::string("AsyncExchange host_recv_buffer") , + recv_msg_length ); + + // Total send message buffer on the host: + host_send_buffer = buffer_host_type( + std::string("AsyncExchange host_send_buffer") , + send_msg_length ); + + // Individual send message buffer on the host: + send_msg_buffer = buffer_host_type( + std::string("AsyncExchange send_msg_buffer") , + arg_chunk * send_count_max ); + + // MPI asynchronous receive request handles: + recv_request.assign( recv_msg_count , MPI_REQUEST_NULL ); + } + + //------------------------------------------------------------------------ + + void setup() + { + { // Post receives: + const size_t recv_msg_count = data_map.host_recv.dimension_0(); + + ValueType * ptr = host_recv_buffer.ptr_on_device(); + + for ( size_t i = 0 ; i < recv_msg_count ; ++i ) { + const int proc = data_map.host_recv(i,0); + const int count = data_map.host_recv(i,1) * chunk_size ; + + MPI_Irecv( ptr , count * sizeof(ValueType) , MPI_BYTE , + proc , mpi_tag , data_map.machine.mpi_comm , + & recv_request[i] ); + + ptr += count ; + } + } + + // Copy send buffer from the device to host memory for sending + + Kokkos::deep_copy( host_send_buffer , dev_send_buffer ); + + // Done with the device until communication is complete. + // Application can dispatch asynchronous work on the device. + } + + // Application can dispatch local work to device ... + // No communication progress until main thread calls 'send_receive' + + void send_receive() + { + const size_t recv_msg_count = data_map.host_recv.dimension_0(); + const size_t send_msg_count = data_map.host_send.dimension_0(); + + // Pack and send: + + for ( size_t i = 0 , j = 0 ; i < send_msg_count ; ++i ) { + const int proc = data_map.host_send(i,0); + const int count = data_map.host_send(i,1); + + for ( int k = 0 , km = 0 ; k < count ; ++k , ++j ) { + const int km_end = km + chunk_size ; + int ki = chunk_size * data_map.host_send_item(j); + + for ( ; km < km_end ; ++km , ++ki ) { + send_msg_buffer[km] = host_send_buffer[ki]; + } + } + + // MPI_Ssend blocks until + // (1) a receive is matched for the message and + // (2) the send buffer can be re-used. + // + // It is suggested that MPI_Ssend will have the best performance: + // http://www.mcs.anl.gov/research/projects/mpi/sendmode.html . + + MPI_Ssend( send_msg_buffer.ptr_on_device(), + count * chunk_size * sizeof(ValueType) , MPI_BYTE , + proc , mpi_tag , data_map.machine.mpi_comm ); + } + + // Wait for receives and verify: + + for ( size_t i = 0 ; i < recv_msg_count ; ++i ) { + MPI_Status recv_status ; + int recv_which = 0 ; + int recv_size = 0 ; + + MPI_Waitany( recv_msg_count , & recv_request[0] , + & recv_which , & recv_status ); + + const int recv_proc = recv_status.MPI_SOURCE ; + + MPI_Get_count( & recv_status , MPI_BYTE , & recv_size ); + + // Verify message properly received: + + const int expected_proc = data_map.host_recv(recv_which,0); + const int expected_size = data_map.host_recv(recv_which,1) * + chunk_size * sizeof(ValueType); + + if ( ( expected_proc != recv_proc ) || + ( expected_size != recv_size ) ) { + std::ostringstream msg ; + msg << "AsyncExchange error:" + << " P" << comm::rank( data_map.machine ) + << " received from P" << recv_proc + << " size " << recv_size + << " expected " << expected_size + << " from P" << expected_proc ; + throw std::runtime_error( msg.str() ); + } + } + + // Copy received data to device memory. + + Kokkos::deep_copy( dev_recv_buffer , host_recv_buffer ); + } +}; + +} // namespace Kokkos + +#else /* ! #ifdef KOKKOS_HAVE_MPI */ + +namespace Kokkos { + +template< class ValueType , class Device > +class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > { +public: + + typedef Device execution_space ; + typedef Kokkos::ParallelDataMap data_map_type ; + typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ; + typedef typename buffer_dev_type::HostMirror buffer_host_type ; + + buffer_dev_type dev_buffer ; + +public: + + const buffer_dev_type & buffer() const { return dev_buffer ; } + + AsyncExchange( const data_map_type & , const size_t ) + : dev_buffer() + { } + + //------------------------------------------------------------------------ + + void setup() { } + + void send_receive() { } +}; + +} // namespace Kokkos + +#endif /* ! #ifdef KOKKOS_HAVE_MPI */ + +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_PARALLELDATAMAP_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/ParallelMachine.cpp b/lib/kokkos/example/multi_fem/ParallelMachine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0953cab760b42adfa75f8aac3186bd8401f997ec --- /dev/null +++ b/lib/kokkos/example/multi_fem/ParallelMachine.cpp @@ -0,0 +1,178 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if 0 + +#include <stdlib.h> +#include <string.h> + +#include <ParallelMachine.hpp> + +#include <Kokkos_Core.hpp> + +#if ! defined( KOKKOS_HAVE_MPI ) +#define MPI_COMM_NULL 0 +#endif + +//------------------------------------------------------------------------ + +namespace Parallel { + +Machine::Machine( int * argc , char *** argv ) + : m_mpi_comm( MPI_COMM_NULL ) + , m_mpi_size(0) + , m_mpi_rank(0) + , m_mpi_gpu(0) +{ + +#if defined( KOKKOS_HAVE_CUDA ) + //------------------------------------ + // Might be using a Cuda aware version of MPI. + // Must select Cuda device before initializing MPI. + { + int i = 1 ; + for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") ; ++i ); + + if ( i < *argc ) { + // Determine, if possible, what will be the node-local + // rank of the MPI process once MPI has been initialized. + // This rank is needed to set the Cuda device before 'mvapich' + // is initialized. + + const char * const mvapich_local_rank = getenv("MV2_COMM_WORLD_LOCAL_RANK"); + const char * const slurm_local_rank = getenv("SLURM_LOCALID"); + + const int pre_mpi_local_rank = + 0 != mvapich_local_rank ? atoi( mvapich_local_rank ) : ( + 0 != slurm_local_rank ? atoi( slurm_local_rank ) : ( + -1 ) ); + + if ( 0 <= pre_mpi_local_rank ) { + + const int ngpu = Kokkos::Cuda::detect_device_count(); + + const int cuda_device_rank = pre_mpi_local_rank % ngpu ; + + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) ); + + m_mpi_gpu = 1 ; + } + } + } +#endif + + //------------------------------------ + +#if defined( KOKKOS_HAVE_MPI ) + MPI_Init( argc , argv ); + m_mpi_comm = MPI_COMM_WORLD ; + MPI_Comm_size( m_mpi_comm , & m_mpi_size ); + MPI_Comm_rank( m_mpi_comm , & m_mpi_rank ); +#endif + + // Query hwloc after MPI initialization to allow MPI binding: + //------------------------------------ + // Request to use host device: + { + int i = 1 ; + for ( ; i < *argc && strcmp((*argv)[i],"host") ; ++i ); + + if ( i < *argc ) { + + unsigned team_count = Kokkos::hwloc::get_available_numa_count(); + unsigned threads_per_team = Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + if ( i + 2 < *argc ) { + team_count = atoi( (*argv)[i+1] ); + threads_per_team = atoi( (*argv)[i+2] ); + } + + Kokkos::Threads::initialize( team_count * threads_per_team ); + } + } + +#if defined( KOKKOS_HAVE_CUDA ) + //------------------------------------ + // Request to use Cuda device and not already initialized. + if ( ! m_mpi_gpu ) { + int i = 1 ; + for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") && strcmp((*argv)[i],"cuda") ; ++i ); + + if ( i < *argc ) { + + const int ngpu = Kokkos::Cuda::detect_device_count(); + + const int cuda_device_rank = m_mpi_rank % ngpu ; + + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) ); + } + } +#endif + +} + +Machine::~Machine() +{ + Kokkos::Threads::finalize(); +#if defined( KOKKOS_HAVE_CUDA ) + Kokkos::Cuda::finalize(); +#endif +#if defined( KOKKOS_HAVE_MPI ) + MPI_Finalize(); +#endif +} + +void Machine::print_configuration( std::ostream & msg ) const +{ + msg << "MPI [ " << m_mpi_rank << " / " << m_mpi_size << " ]" << std::endl ; + Kokkos::Threads::print_configuration( msg ); +#if defined( KOKKOS_HAVE_CUDA ) + Kokkos::Cuda::print_configuration( msg ); +#endif +} + +} + +#endif /* #if 0 */ + diff --git a/lib/kokkos/example/multi_fem/ParallelMachine.hpp b/lib/kokkos/example/multi_fem/ParallelMachine.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1ddf50ab3b5e9fe28ce6c6b84e7d7d4877f588bd --- /dev/null +++ b/lib/kokkos/example/multi_fem/ParallelMachine.hpp @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#error "ParallelMachine" + +#ifndef PARALLELMACHINE_HPP +#define PARALLELMACHINE_HPP + +//------------------------------------------------------------------------ + +#include <iosfwd> + +#include <Kokkos_Core.hpp> + +//------------------------------------------------------------------------ + +#if defined( KOKKOS_HAVE_MPI ) +#include <mpi.h> +#else + typedef int MPI_Comm ; +#endif + +//------------------------------------------------------------------------ +//------------------------------------------------------------------------ + +namespace Parallel { + +/** \brief Hybrid parallel machine with MPI+Kokkos::Threads or MPI+Kokkos::Cuda. + * + * Initialization of MPI and Kokkos device has interdependencies which this + * class manages. The command line and environment variables are queried to initialize + * the Threads or Cuda device: + * + * 1) cuda : initializes Cuda device + * 2) host : initializes Threads device with all hwloc detected cores. + * 3) host #gang #worker : initializes Threads with specified + */ +class Machine { +private: + + MPI_Comm m_mpi_comm ; + int m_mpi_size ; + int m_mpi_rank ; + unsigned m_mpi_gpu ; + unsigned m_gpu_arch ; + + Machine(); + Machine( const Machine & ); + Machine & operator = ( const Machine & ); + +public: + + /** \brief Coordinated initialize MPI, Cuda, or Threads devices from 'main'. */ + Machine( int * argc , char *** argv ); + + ~Machine(); + + MPI_Comm mpi_comm() const { return m_mpi_comm ; } + + int mpi_size() const { return m_mpi_size ; } + int mpi_rank() const { return m_mpi_rank ; } + + /** \brief If using MPI that can directly operate on GPU memory */ + bool mpi_gpu() const { return m_mpi_gpu ; } + + /** \brief If using GPU then what architecture */ + unsigned gpu_arch() const { return m_gpu_arch ; } + + void print_configuration( std::ostream & ) const ; +}; + +} + +//------------------------------------------------------------------------ + +#endif /* #ifndef PARALLELMACHINE_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp b/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8d140b6d257af15bc7d0980624e8a20178f29a19 --- /dev/null +++ b/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp @@ -0,0 +1,400 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef SPARSELINEARSYSTEM_HPP +#define SPARSELINEARSYSTEM_HPP + +#include <cmath> +#include <impl/Kokkos_Timer.hpp> + +#include <Kokkos_Core.hpp> +#include <Kokkos_StaticCrsGraph.hpp> + +#include <LinAlgBLAS.hpp> + +namespace Kokkos { + +template< typename ScalarType , class Device > +struct CrsMatrix { + typedef Device execution_space ; + typedef ScalarType value_type ; + + typedef StaticCrsGraph< int , execution_space , void , int > graph_type ; + typedef View< value_type* , execution_space > coefficients_type ; + + graph_type graph ; + coefficients_type coefficients ; +}; + +//---------------------------------------------------------------------------- + +namespace Impl { + +template< class Matrix , class OutputVector , class InputVector > +struct Multiply ; + +} +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename AScalarType , + typename VScalarType , + class DeviceType > +struct Multiply< CrsMatrix<AScalarType,DeviceType> , + View<VScalarType*,DeviceType > , + View<VScalarType*,DeviceType > > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + typedef View< VScalarType*, execution_space, MemoryUnmanaged > vector_type ; + typedef View< const VScalarType*, execution_space, MemoryUnmanaged > vector_const_type ; + + typedef CrsMatrix< AScalarType , execution_space > matrix_type ; + +private: + + matrix_type m_A ; + vector_const_type m_x ; + vector_type m_y ; + +public: + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type iRow ) const + { + const size_type iEntryBegin = m_A.graph.row_map[iRow]; + const size_type iEntryEnd = m_A.graph.row_map[iRow+1]; + + double sum = 0 ; + +#if defined( __INTEL_COMPILER ) +#pragma simd reduction(+:sum) +#pragma ivdep + for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) { + sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) ); + } +#else + for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) { + sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) ); + } +#endif + + m_y(iRow) = sum ; + } + + Multiply( const matrix_type & A , + const size_type nrow , + const size_type , // ncol , + const vector_type & x , + const vector_type & y ) + : m_A( A ), m_x( x ), m_y( y ) + { + parallel_for( nrow , *this ); + } +}; + +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +template< typename AScalarType , + typename VScalarType , + class Device > +class Operator { + typedef CrsMatrix<AScalarType,Device> matrix_type ; + typedef View<VScalarType*,Device> vector_type ; + +private: + const CrsMatrix<AScalarType,Device> A ; + + ParallelDataMap data_map ; + AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ; + +public: + + Operator( const ParallelDataMap & arg_data_map , + const CrsMatrix<AScalarType,Device> & arg_A ) + : A( arg_A ) + , data_map( arg_data_map ) + , exchange( arg_data_map , 1 ) + {} + + void apply( const View<VScalarType*,Device> & x , + const View<VScalarType*,Device> & y ) + { + // Gather off-processor data for 'x' + + PackArray< vector_type >::pack( exchange.buffer() , + data_map.count_interior , + data_map.count_send , x ); + + exchange.setup(); + + // If interior & boundary matrices then could launch interior multiply + + exchange.send_receive(); + + UnpackArray< vector_type >::unpack( x , exchange.buffer() , + data_map.count_owned , + data_map.count_receive ); + + const typename Device::size_type nrow = data_map.count_owned ; + const typename Device::size_type ncol = data_map.count_owned + + data_map.count_receive ; + + Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y); + } +}; + +//---------------------------------------------------------------------------- + +template< typename AScalarType , typename VScalarType , class Device > +void cgsolve( + const ParallelDataMap data_map , + const CrsMatrix<AScalarType,Device> A , + const View<VScalarType*,Device> b , + const View<VScalarType*,Device> x , + size_t & iteration , + double & normr , + double & iter_time , + const size_t maximum_iteration = 200 , + const double tolerance = std::numeric_limits<VScalarType>::epsilon() ) +{ + typedef View<VScalarType*,Device> vector_type ; + //typedef View<VScalarType, Device> value_type ; // unused + + const size_t count_owned = data_map.count_owned ; + const size_t count_total = data_map.count_owned + data_map.count_receive ; + + Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A ); + + // Need input vector to matvec to be owned + received + vector_type pAll ( "cg::p" , count_total ); + + vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) ); + vector_type r ( "cg::r" , count_owned ); + vector_type Ap( "cg::Ap", count_owned ); + + /* r = b - A * x ; */ + + /* p = x */ deep_copy( p , x ); + /* Ap = A * p */ matrix_operator.apply( pAll , Ap ); + /* r = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r ); + /* p = r */ deep_copy( p , r ); + + double old_rdot = dot( count_owned , r , data_map.machine ); + + normr = sqrt( old_rdot ); + iteration = 0 ; + + Kokkos::Timer wall_clock ; + + while ( tolerance < normr && iteration < maximum_iteration ) { + + /* pAp_dot = dot( p , Ap = A * p ) */ + + /* Ap = A * p */ matrix_operator.apply( pAll , Ap ); + + const double pAp_dot = dot( count_owned , p , Ap , data_map.machine ); + const double alpha = old_rdot / pAp_dot ; + + /* x += alpha * p ; */ axpy( count_owned, alpha, p , x ); + /* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r ); + + const double r_dot = dot( count_owned , r , data_map.machine ); + const double beta = r_dot / old_rdot ; + + /* p = r + beta * p ; */ xpby( count_owned , r , beta , p ); + + normr = sqrt( old_rdot = r_dot ); + ++iteration ; + } + + iter_time = wall_clock.seconds(); +} + +//---------------------------------------------------------------------------- + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_CUDA ) + +#if ( CUDA_VERSION < 6000 ) +#pragma message "cusparse_v2.h" +#include <cusparse_v2.h> +#else +#pragma message "cusparse.h" +#include <cusparse.h> +#endif + +namespace Kokkos { +namespace Impl { + +struct CudaSparseSingleton { + cusparseHandle_t handle; + cusparseMatDescr_t descra; + + CudaSparseSingleton() + { + cusparseCreate( & handle ); + cusparseCreateMatDescr( & descra ); + cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL ); + cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO ); + } + + static CudaSparseSingleton & singleton(); + +}; + +template<> +struct Multiply< CrsMatrix<double,Cuda> , + View<double*,Cuda > , + View<double*,Cuda > > +{ + typedef Cuda execution_space ; + typedef execution_space::size_type size_type ; + typedef double scalar_type ; + typedef View< scalar_type* , execution_space > vector_type ; + typedef CrsMatrix< scalar_type , execution_space > matrix_type ; + +public: + + Multiply( const matrix_type & A , + const size_type nrow , + const size_type ncol , + const vector_type & x , + const vector_type & y ) + { + CudaSparseSingleton & s = CudaSparseSingleton::singleton(); + const scalar_type alpha = 1 , beta = 0 ; + + cusparseStatus_t status = + cusparseDcsrmv( s.handle , + CUSPARSE_OPERATION_NON_TRANSPOSE , + nrow , ncol , A.coefficients.dimension_0() , + &alpha , + s.descra , + A.coefficients.ptr_on_device() , + A.graph.row_map.ptr_on_device() , + A.graph.entries.ptr_on_device() , + x.ptr_on_device() , + &beta , + y.ptr_on_device() ); + + if ( CUSPARSE_STATUS_SUCCESS != status ) { + throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); + } + } +}; + + +template<> +struct Multiply< CrsMatrix<float,Cuda> , + View<float*,Cuda > , + View<float*,Cuda > > +{ + typedef Cuda execution_space ; + typedef execution_space::size_type size_type ; + typedef float scalar_type ; + typedef View< scalar_type* , execution_space > vector_type ; + typedef CrsMatrix< scalar_type , execution_space > matrix_type ; + +public: + + Multiply( const matrix_type & A , + const size_type nrow , + const size_type ncol , + const vector_type & x , + const vector_type & y ) + { + CudaSparseSingleton & s = CudaSparseSingleton::singleton(); + const scalar_type alpha = 1 , beta = 0 ; + + cusparseStatus_t status = + cusparseScsrmv( s.handle , + CUSPARSE_OPERATION_NON_TRANSPOSE , + nrow , ncol , A.coefficients.dimension_0() , + &alpha , + s.descra , + A.coefficients.ptr_on_device() , + A.graph.row_map.ptr_on_device() , + A.graph.entries.ptr_on_device() , + x.ptr_on_device() , + &beta , + y.ptr_on_device() ); + + if ( CUSPARSE_STATUS_SUCCESS != status ) { + throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); + } + } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef SPARSELINEARSYSTEM_HPP */ + diff --git a/lib/kokkos/example/multi_fem/SparseLinearSystemFill.hpp b/lib/kokkos/example/multi_fem/SparseLinearSystemFill.hpp new file mode 100644 index 0000000000000000000000000000000000000000..78680cfb6a1ea00e514c2a17cc8ed162542ee732 --- /dev/null +++ b/lib/kokkos/example/multi_fem/SparseLinearSystemFill.hpp @@ -0,0 +1,276 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef SPARSELINEARSYSTEMFILL_HPP +#define SPARSELINEARSYSTEMFILL_HPP + +#include <vector> +#include <algorithm> +#include <limits> + +#include <FEMesh.hpp> +#include <SparseLinearSystem.hpp> + +//---------------------------------------------------------------------------- + +namespace HybridFEM { + +template< class MatrixType , class MeshType , + class elem_matrices_type , + class elem_vectors_type > struct GatherFill ; + + +template< typename ScalarType , + class DeviceType , + unsigned ElemNode , + typename CoordScalarType , + class elem_matrices_type , + class elem_vectors_type > +struct GatherFill< + Kokkos::CrsMatrix< ScalarType , DeviceType > , + FEMesh< CoordScalarType , ElemNode , DeviceType > , + elem_matrices_type , elem_vectors_type > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + + static const size_type ElemNodeCount = ElemNode ; + + typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ; + typedef typename matrix_type::coefficients_type coefficients_type ; + typedef Kokkos::View< ScalarType[] , execution_space > vector_type ; + typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > elem_graph_type ; + + typedef FEMesh< CoordScalarType , ElemNodeCount , execution_space > mesh_type ; + typedef typename mesh_type::node_elem_ids_type node_elem_ids_type ; + +private: + + node_elem_ids_type node_elem_ids ; + elem_graph_type elem_graph ; + elem_matrices_type elem_matrices ; + elem_vectors_type elem_vectors ; + coefficients_type system_coeff ; + vector_type system_rhs ; + +public: + + KOKKOS_INLINE_FUNCTION + void operator()( size_type irow ) const + { + const size_type node_elem_begin = node_elem_ids.row_map[irow]; + const size_type node_elem_end = node_elem_ids.row_map[irow+1]; + + // for each element that a node belongs to + + for ( size_type i = node_elem_begin ; i < node_elem_end ; i++ ) { + + const size_type elem_id = node_elem_ids.entries( i, 0); + const size_type row_index = node_elem_ids.entries( i, 1); + + system_rhs(irow) += elem_vectors(elem_id, row_index); + + // for each node in a particular related element + // gather the contents of the element stiffness + // matrix that belong in irow + + for ( size_type j = 0 ; j < ElemNodeCount ; ++j ){ + const size_type A_index = elem_graph( elem_id , row_index , j ); + + system_coeff( A_index ) += elem_matrices( elem_id, row_index, j ); + } + } + } + + + static void apply( const matrix_type & matrix , + const vector_type & rhs , + const mesh_type & mesh , + const elem_graph_type & elem_graph , + const elem_matrices_type & elem_matrices , + const elem_vectors_type & elem_vectors ) + { + const size_t row_count = matrix.graph.row_map.dimension_0() - 1 ; + GatherFill op ; + op.node_elem_ids = mesh.node_elem_ids ; + op.elem_graph = elem_graph ; + op.elem_matrices = elem_matrices ; + op.elem_vectors = elem_vectors ; + op.system_coeff = matrix.coefficients ; + op.system_rhs = rhs ; + + parallel_for( row_count , op ); + } +}; + +} /* namespace HybridFEM */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace HybridFEM { + +template< class GraphType , class MeshType > +struct GraphFactory { + typedef GraphType graph_type ; + typedef MeshType mesh_type ; + typedef typename graph_type::execution_space execution_space ; + typedef typename execution_space::size_type size_type ; + + static const unsigned ElemNodeCount = mesh_type::element_node_count ; + + typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > element_map_type ; + + static + void + create( const mesh_type & mesh , + graph_type & graph , + element_map_type & elem_map ) + { + typename mesh_type::node_elem_ids_type::HostMirror + node_elem_ids = create_mirror( mesh.node_elem_ids ); + + typename mesh_type::elem_node_ids_type::HostMirror + elem_node_ids = create_mirror( mesh.elem_node_ids ); + + typedef typename element_map_type::HostMirror element_map_host_type ; + + deep_copy( elem_node_ids , mesh.elem_node_ids ); + deep_copy( node_elem_ids.entries , mesh.node_elem_ids.entries ); + + const size_t owned_node = mesh.parallel_data_map.count_owned ; + const size_t total_elem = mesh.elem_node_ids.dimension_0(); + + if ( total_elem ) { + elem_map = element_map_type( std::string("element_map"), total_elem ); + } + + element_map_host_type elem_map_host = create_mirror( elem_map ); + + //------------------------------------ + // Node->node mapping for the CrsMatrix graph + + std::vector< std::vector< unsigned > > node_node_ids( owned_node ); + std::vector< unsigned > node_node_begin( owned_node ); + + size_t offset = 0 ; + for ( size_t i = 0 ; i < owned_node ; ++i ) { + const size_t j_end = node_elem_ids.row_map[i+1]; + size_t j = node_elem_ids.row_map[i]; + + node_node_begin[i] = offset ; + + std::vector< unsigned > & work = node_node_ids[i] ; + + for ( ; j < j_end ; ++j ) { + const size_t elem_id = node_elem_ids.entries(j,0); + for ( size_t k = 0 ; k < ElemNodeCount ; ++k ) { + work.push_back( elem_node_ids( elem_id , k ) ); + } + } + + std::sort( work.begin() , work.end() ); + + work.erase( std::unique( work.begin() , work.end() ) , work.end() ); + + offset += work.size(); + } + + graph = Kokkos::create_staticcrsgraph< graph_type >( "node_node_ids" , node_node_ids ); + + //------------------------------------ + // ( element , node_row , node_column ) -> matrix_crs_column + + for ( size_t elem_id = 0 ; elem_id < total_elem ; ++elem_id ) { + for ( size_t i = 0 ; i < ElemNodeCount ; ++i ) { + + const size_t node_row = elem_node_ids( elem_id , i ); + const size_t node_row_begin = node_node_begin[ node_row ]; + const std::vector< unsigned > & column = node_node_ids[ node_row ] ; + + if ( owned_node <= node_row ) { + for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) { + elem_map_host( elem_id , i , j ) = std::numeric_limits<size_type>::max(); + } + } + else { + + for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) { + const size_type node_col = elem_node_ids( elem_id , j ); + + int col_search = 0 ; + + for ( int len = column.size() ; 0 < len ; ) { + + const int half = len >> 1; + const int middle = col_search + half ; + + if ( column[middle] < node_col ){ + col_search = middle + 1 ; + len -= half + 1 ; + } + else { + len = half ; + } + } +if ( node_col != column[col_search] ) { + throw std::runtime_error(std::string("Failed")); +} + elem_map_host( elem_id , i , j ) = col_search + node_row_begin ; + } + } + } + } + + deep_copy( elem_map , elem_map_host ); + } +}; + +} // namespace HybridFEM + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef SPARSELINEARSYSTEMFILL_HPP */ + diff --git a/lib/kokkos/example/multi_fem/SparseLinearSystem_Cuda.hpp b/lib/kokkos/example/multi_fem/SparseLinearSystem_Cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3b22d4c5d0e73d9c5c39e92ad043debf796993a5 --- /dev/null +++ b/lib/kokkos/example/multi_fem/SparseLinearSystem_Cuda.hpp @@ -0,0 +1,164 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef SPARSELINEARSYSTEM_CUDA_HPP +#define SPARSELINEARSYSTEM_CUDA_HPP + +#if defined( BUILD_FROM_CU_FILE ) + +#include <cusparse_v2.h> +#include <Kokkos_Core.hpp> + +namespace Kokkos { +namespace Impl { + + +struct CudaSparseSingleton { + cusparseHandle_t handle; + cusparseMatDescr_t descra; + + CudaSparseSingleton() + { + cusparseCreate( & handle ); + cusparseCreateMatDescr( & descra ); + cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL ); + cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO ); + } + + static CudaSparseSingleton & singleton(); + +}; + +CudaSparseSingleton & CudaSparseSingleton::singleton() +{ static CudaSparseSingleton s ; return s ; } + + +template<> +struct Multiply< CrsMatrix<double,Cuda> , + View<double*,Cuda > , + View<double*,Cuda > > +{ + typedef Cuda execution_space ; + typedef execution_space::size_type size_type ; + typedef double scalar_type ; + typedef View< scalar_type* , execution_space > vector_type ; + typedef CrsMatrix< scalar_type , execution_space > matrix_type ; + +public: + + Multiply( const matrix_type & A , + const size_type nrow , + const size_type ncol , + const vector_type & x , + const vector_type & y ) + { + CudaSparseSingleton & s = CudaSparseSingleton::singleton(); + const scalar_type alpha = 1 , beta = 0 ; + + cusparseStatus_t status = + cusparseDcsrmv( s.handle , + CUSPARSE_OPERATION_NON_TRANSPOSE , + nrow , ncol , A.coefficients.dimension_0() , + &alpha , + s.descra , + A.coefficients.ptr_on_device() , + A.graph.row_map.ptr_on_device() , + A.graph.entries.ptr_on_device() , + x.ptr_on_device() , + &beta , + y.ptr_on_device() ); + + if ( CUSPARSE_STATUS_SUCCESS != status ) { + throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); + } + } +}; + + +template<> +struct Multiply< CrsMatrix<float,Cuda> , + View<float*,Cuda > , + View<float*,Cuda > > +{ + typedef Cuda execution_space ; + typedef execution_space::size_type size_type ; + typedef float scalar_type ; + typedef View< scalar_type* , execution_space > vector_type ; + typedef CrsMatrix< scalar_type , execution_space > matrix_type ; + +public: + + Multiply( const matrix_type & A , + const size_type nrow , + const size_type ncol , + const vector_type & x , + const vector_type & y ) + { + CudaSparseSingleton & s = CudaSparseSingleton::singleton(); + const scalar_type alpha = 1 , beta = 0 ; + + cusparseStatus_t status = + cusparseScsrmv( s.handle , + CUSPARSE_OPERATION_NON_TRANSPOSE , + nrow , ncol , A.coefficients.dimension_0() , + &alpha , + s.descra , + A.coefficients.ptr_on_device() , + A.graph.row_map.ptr_on_device() , + A.graph.entries.ptr_on_device() , + x.ptr_on_device() , + &beta , + y.ptr_on_device() ); + + if ( CUSPARSE_STATUS_SUCCESS != status ) { + throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); + } + } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #if defined( __CUDACC__ ) */ +#endif /* #ifndef SPARSELINEARSYSTEM_CUDA_HPP */ + diff --git a/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp b/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9cc32b6b104f5d161d05cb3ced0f8412dbfbefbc --- /dev/null +++ b/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TESTFEMESHBOXFIXTURE_HPP +#define TESTFEMESHBOXFIXTURE_HPP + +#include <stdio.h> +#include <iostream> +#include <stdexcept> +#include <limits> +#include <utility> +#include <BoxMeshFixture.hpp> + +#include <ParallelComm.hpp> + +//---------------------------------------------------------------------------- + +namespace TestFEMesh { + +template< class ViewType > +struct VerifyUnpack ; + +template< typename DeviceType, typename T > +struct VerifyUnpack< Kokkos::View< T*[3] , DeviceType > > +{ + typedef DeviceType execution_space ; + typedef typename execution_space::size_type size_type ; + typedef size_type value_type ; + + typedef Kokkos::View< T* , execution_space > buffer_type ; + typedef Kokkos::View< T*[3] , execution_space > array_type ; + +private: + + array_type node_coords ; + buffer_type buffer ; + size_type node_begin ; + +public: + + KOKKOS_INLINE_FUNCTION + static void init( value_type & update ) + { update = 0 ; } + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + const volatile value_type & source ) + { update += source ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const size_type i , value_type & update ) const + { + const size_type node_id = i + node_begin ; + const size_type k = i * 3 ; + + const long xb = buffer[k]; + const long yb = buffer[k+1]; + const long zb = buffer[k+2]; + const long xn = node_coords(node_id,0); + const long yn = node_coords(node_id,1); + const long zn = node_coords(node_id,2); + + if ( xb != xn || yb != yn || zb != zn ) { + printf("TestFEMesh::VerifyUnpack failed at %d : node %d : { %ld %ld %ld } != { %ld %ld %ld }\n", + (int)i,(int)node_id, xb,yb,zb, xn, yn, zn ); + ++update ; + } + } + + static inline + size_type unpack( const array_type & arg_node_coords , + const size_type arg_node_begin , + const size_type arg_node_count , + const buffer_type & arg_buffer ) + { + VerifyUnpack op ; + op.node_coords = arg_node_coords ; + op.buffer = arg_buffer ; + op.node_begin = arg_node_begin ; + size_type count = 0 ; + Kokkos::parallel_reduce( arg_node_count , op , count ); + return count ; + } +}; + +} + +//---------------------------------------------------------------------------- + +#ifdef KOKKOS_HAVE_MPI + +namespace TestFEMesh { + +template< typename coordinate_scalar_type , + unsigned ElemNodeCount , + class Device > +void verify_parallel( + const HybridFEM::FEMesh< coordinate_scalar_type , + ElemNodeCount , + Device > & mesh ) +{ + typedef HybridFEM::FEMesh< coordinate_scalar_type, ElemNodeCount, Device > femesh_type ; + typedef typename femesh_type::node_coords_type node_coords_type ; + + comm::Machine machine = mesh.parallel_data_map.machine ; + + // Communicate node coordinates to verify communication and setup. + + const size_t chunk_size = 3 ; + + Kokkos::AsyncExchange< coordinate_scalar_type, Device, Kokkos::ParallelDataMap > + exchange( mesh.parallel_data_map , chunk_size ); + + const size_t send_begin = mesh.parallel_data_map.count_interior ; + const size_t send_count = mesh.parallel_data_map.count_send ; + + const size_t recv_begin = mesh.parallel_data_map.count_owned ; + const size_t recv_count = mesh.parallel_data_map.count_receive ; + + typedef Kokkos::PackArray< node_coords_type > pack_type ; + + pack_type::pack( exchange.buffer(), send_begin, send_count, mesh.node_coords ); + + exchange.setup(); + + // Launch local-action device kernels + + exchange.send_receive(); + + unsigned long local[3] ; + local[0] = mesh.parallel_data_map.count_owned ; + local[1] = mesh.parallel_data_map.count_receive ; + local[2] = TestFEMesh::VerifyUnpack< node_coords_type >::unpack( mesh.node_coords, recv_begin, recv_count, exchange.buffer() ); + + unsigned long global[3] = { 0 , 0 , 0 }; + + MPI_Allreduce( local , global , + 3 , MPI_UNSIGNED_LONG , MPI_SUM , machine.mpi_comm ); + + if ( 0 == comm::rank( machine ) ) { + std::cout << ( global[2] ? "FAILED" : "PASSED" ) + << ": TestFEMesh::verify_parallel " + << "NP(" << comm::size( machine ) + << ") total_node(" << global[0] + << ") verified_nodes(" << global[1] + << ") failed_nodes(" << global[2] + << ")" << std::endl ; + } +} + +} // namespace TestFEMesh + +#else /* ! #ifdef KOKKOS_HAVE_MPI */ + +namespace TestFEMesh { + +template< typename coordinate_scalar_type , + unsigned ElemNodeCount , + class Device > +void verify_parallel( + const HybridFEM::FEMesh< coordinate_scalar_type , + ElemNodeCount , + Device > & ) +{} + +} // namespace TestFEMesh + +#endif /* ! #ifdef KOKKOS_HAVE_MPI */ + +//---------------------------------------------------------------------------- + +template< class Device > +void test_box_fixture( comm::Machine machine , + const size_t gang_count , + const size_t nodes_nx , + const size_t nodes_ny , + const size_t nodes_nz ) +{ + typedef long coordinate_scalar_type ; + typedef FixtureElementHex8 fixture_element_type ; + + typedef BoxMeshFixture< coordinate_scalar_type , + Device , + fixture_element_type > fixture_type ; + + typedef typename fixture_type::FEMeshType mesh_type ; + + const size_t proc_count = comm::size( machine ); + const size_t proc_local = comm::rank( machine ) ; + + mesh_type mesh = + fixture_type::create( proc_count , proc_local , gang_count , + nodes_nx - 1 , nodes_ny - 1 , nodes_nz - 1 ); + + mesh.parallel_data_map.machine = machine ; + + TestFEMesh::verify_parallel( mesh ); +} + +#endif /* #ifndef TESTFEMESHBOXFIXTURE_HPP */ + + diff --git a/lib/kokkos/example/multi_fem/TestBoxMeshPartition.cpp b/lib/kokkos/example/multi_fem/TestBoxMeshPartition.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ffaeeb6af5e1403b0b57b72af909905f7811ccf8 --- /dev/null +++ b/lib/kokkos/example/multi_fem/TestBoxMeshPartition.cpp @@ -0,0 +1,172 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include <iostream> +#include <stdexcept> +#include <limits> +#include <utility> +#include <BoxMeshPartition.hpp> + +//---------------------------------------------------------------------------- + +void test_box_partition( bool print ) +{ + const size_t np_max = 10000 ; + + const BoxBoundsLinear use_box ; + + BoxType root_box ; + + root_box[0][0] = 0 ; root_box[0][1] = 100 ; + root_box[1][0] = 0 ; root_box[1][1] = 200 ; + root_box[2][0] = 0 ; root_box[2][1] = 300 ; + + const size_t cell_total = + ( root_box[0][1] - root_box[0][0] ) * + ( root_box[1][1] - root_box[1][0] ) * + ( root_box[2][1] - root_box[2][0] ); + + for ( size_t np = 2 ; np < np_max ; np = 2 * ( np + 1 ) ) { + + std::vector<BoxType> part_boxes( np ); + + box_partition_rcb( root_box , part_boxes ); + + size_t cell_goal = ( cell_total + np - 1 ) / np ; + size_t cell_max = 0 ; + + for ( size_t i = 0 ; i < np ; ++i ) { + cell_max = std::max( cell_max , count( part_boxes[i] ) ); + } + + if ( print ) { + std::cout << std::endl + << "box_part( " << np + << " ) max( " << cell_max + << " ) goal( " << cell_goal + << " ) ratio( " << double(cell_max) / double(cell_goal) + << " )" << std::endl ; + } + + const size_t nsample = std::min(np,(size_t)4); + const size_t stride = ( np + nsample - 1 ) / nsample ; + + for ( size_t my_part = 0 ; my_part < np ; my_part += stride ) { + BoxType my_use_box ; + std::vector<size_t> my_use_id_map ; + size_t my_count_interior ; + size_t my_count_owned ; + size_t my_count_uses ; + std::vector<size_t> my_recv_counts ; + std::vector<std::vector<size_t> > my_send_map ; + + size_t count_verify = 0 ; + + box_partition_maps( root_box , part_boxes , + use_box , my_part , + my_use_box , my_use_id_map , + my_count_interior , + my_count_owned , + my_count_uses , + my_recv_counts , + my_send_map ); + + count_verify = my_count_owned ; + + if ( print ) { + std::cout << " my_part(" << my_part << ") layout { " + << "P" << my_part + << "(" << my_count_interior + << "," << ( my_count_owned - my_count_interior ) + << ")" ; + } + + for ( size_t i = 1 ; i < np ; ++i ) { + if ( my_recv_counts[i] ) { + count_verify += my_recv_counts[i] ; + const size_t ip = ( my_part + i ) % np ; + + if ( print ) { + std::cout << " P" << ip << "(" << my_recv_counts[i] << ")" ; + } + + // Compare recv & send lists + + BoxType ip_use_box ; + std::vector<size_t> ip_use_id_map ; + size_t ip_count_interior ; + size_t ip_count_owned ; + size_t ip_count_uses ; + std::vector<size_t> ip_recv_counts ; + std::vector<std::vector<size_t> > ip_send_map ; + + box_partition_maps( root_box , part_boxes , + use_box , ip , + ip_use_box , ip_use_id_map , + ip_count_interior , + ip_count_owned , + ip_count_uses , + ip_recv_counts , + ip_send_map ); + + // Sent by ip, received by my_part: + + const BoxType recv_send = intersect( part_boxes[ip] , my_use_box ); + const size_t recv_send_count = count( recv_send ); + + const size_t j = ( my_part + np - ip ) % np ; + + if ( recv_send_count != my_recv_counts[i] || + recv_send_count != ip_send_map[j].size() ) { + throw std::runtime_error( std::string("bad recv/send map") ); + } + } + } + if ( print ) { std::cout << " }" << std::endl ; } + + if ( count_verify != my_count_uses ) { + throw std::runtime_error( std::string("bad partition map") ); + } + } + } +} + + diff --git a/lib/kokkos/example/multi_fem/TestCuda.cpp b/lib/kokkos/example/multi_fem/TestCuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11370a53504589c1f06e19caa5cf0110a7e9a909 --- /dev/null +++ b/lib/kokkos/example/multi_fem/TestCuda.cpp @@ -0,0 +1,192 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#include <TestBoxMeshFixture.hpp> +#include <Implicit.hpp> +#include <Nonlinear.hpp> +#include <Explicit.hpp> + +#include <SparseLinearSystem.hpp> + +#if defined( KOKKOS_HAVE_CUDA ) + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +CudaSparseSingleton & CudaSparseSingleton::singleton() +{ static CudaSparseSingleton s ; return s ; } + +} +} + +//---------------------------------------------------------------------------- + +void test_cuda_query( comm::Machine machine ) +{ + const size_t comm_rank = comm::rank( machine ); + std::cout << "P" << comm_rank + << ": Cuda device_count = " + << Kokkos::Cuda::detect_device_count() + << std::endl ; +} + +//---------------------------------------------------------------------------- + +void test_cuda_fixture( comm::Machine machine , + size_t nx , size_t ny , size_t nz ) +{ + const size_t comm_rank = comm::rank( machine ); + const size_t comm_size = comm::size( machine ); + const size_t dev_count = Kokkos::Cuda::detect_device_count(); + const size_t dev_rank = + dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ; + const size_t gang_count = 0 ; + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::SelectDevice select_device( dev_rank ); + Kokkos::Cuda::initialize( select_device ); + test_box_fixture<Kokkos::Cuda>( machine , gang_count , nx , ny , nz ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +void test_cuda_implicit( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + const size_t comm_rank = comm::rank( machine ); + const size_t comm_size = comm::size( machine ); + const size_t dev_count = Kokkos::Cuda::detect_device_count(); + const size_t dev_rank = + dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ; + const size_t gang_count = 0 ; + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::SelectDevice select_device( dev_rank ); + Kokkos::Cuda::initialize( select_device ); + HybridFEM::Implicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +void test_cuda_explicit( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + const size_t comm_rank = comm::rank( machine ); + const size_t comm_size = comm::size( machine ); + const size_t dev_count = Kokkos::Cuda::detect_device_count(); + const size_t dev_rank = + dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ; + const size_t gang_count = 0 ; + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::SelectDevice select_device( dev_rank ); + Kokkos::Cuda::initialize( select_device ); + Explicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +void test_cuda_nonlinear( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + const size_t comm_rank = comm::rank( machine ); + const size_t comm_size = comm::size( machine ); + const size_t dev_count = Kokkos::Cuda::detect_device_count(); + const size_t dev_rank = + dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ; + const size_t gang_count = 0 ; + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::SelectDevice select_device( dev_rank ); + Kokkos::Cuda::initialize( select_device ); + + typedef Kokkos::Cuda device ; + typedef FixtureElementHex8 hex8 ; + HybridFEM::Nonlinear::driver<double,device,hex8>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +} + +void test_cuda_nonlinear_quadratic( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + const size_t comm_rank = comm::rank( machine ); + const size_t comm_size = comm::size( machine ); + const size_t dev_count = Kokkos::Cuda::detect_device_count(); + const size_t dev_rank = + dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ; + const size_t gang_count = 0 ; + + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::SelectDevice select_device( dev_rank ); + Kokkos::Cuda::initialize( select_device ); + + typedef Kokkos::Cuda device ; + typedef FixtureElementHex27 hex27 ; + HybridFEM::Nonlinear::driver<double,device,hex27>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ + diff --git a/lib/kokkos/example/multi_fem/TestHost.cpp b/lib/kokkos/example/multi_fem/TestHost.cpp new file mode 100644 index 0000000000000000000000000000000000000000..facdd0f28a2e0f5d19f42cfefaccb643c01becdd --- /dev/null +++ b/lib/kokkos/example/multi_fem/TestHost.cpp @@ -0,0 +1,137 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Must be included first on Intel-Phi systems due to +// redefinition of SEEK_SET in <mpi.h>. + +#include <ParallelComm.hpp> + +#include <iostream> +#include <stdexcept> +#include <limits> +#include <utility> + +//---------------------------------------------------------------------------- + +#include <Kokkos_Core.hpp> + +#include <BoxMeshFixture.hpp> +#include <TestBoxMeshFixture.hpp> +#include <Implicit.hpp> +#include <Nonlinear.hpp> +#include <Explicit.hpp> +#include <SparseLinearSystem.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +void test_host_fixture( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t nx , size_t ny , size_t nz ) +{ + Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count ); + test_box_fixture<Kokkos::HostSpace::execution_space>( machine , gang_count , nx , ny , nz ); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +void test_host_implicit( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count ); + HybridFEM::Implicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + +void test_host_explicit( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count ); + Explicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +void test_host_nonlinear( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count ); + typedef FixtureElementHex8 hex8 ; + typedef Kokkos::HostSpace::execution_space device ; + HybridFEM::Nonlinear::driver<double,device,hex8>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::HostSpace::execution_space::finalize(); +} + +void test_host_nonlinear_quadratic( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ) +{ + Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count ); + typedef FixtureElementHex27 hex27 ; + typedef Kokkos::HostSpace::execution_space device ; + HybridFEM::Nonlinear::driver<double,device,hex27>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run ); + Kokkos::HostSpace::execution_space::finalize(); +} + +//---------------------------------------------------------------------------- + + diff --git a/lib/kokkos/example/multi_fem/TestHybridFEM.cpp b/lib/kokkos/example/multi_fem/TestHybridFEM.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1bbd74e4d7f5818712850ef2294849e01d078b92 --- /dev/null +++ b/lib/kokkos/example/multi_fem/TestHybridFEM.cpp @@ -0,0 +1,348 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Must be included first on Intel-Phi systems due to +// redefinition of SEEK_SET in <mpi.h>. + +#include <ParallelComm.hpp> + +#include <string> +#include <sstream> +#include <iostream> +#include <Kokkos_hwloc.hpp> + +//---------------------------------------------------------------------------- + +void test_box_partition( bool print ); + +//---------------------------------------------------------------------------- + +void test_host_fixture( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t nx , size_t ny , size_t nz ); + +void test_host_implicit( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_host_explicit( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_host_nonlinear( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_host_nonlinear_quadratic( comm::Machine machine , + size_t gang_count , + size_t gang_worker_count , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + + +//---------------------------------------------------------------------------- + +void test_cuda_query( comm::Machine ); + +void test_cuda_fixture( comm::Machine machine , + size_t nx , size_t ny , size_t nz ); + +void test_cuda_implicit( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_cuda_explicit( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_cuda_nonlinear( comm:: Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + +void test_cuda_nonlinear_quadratic( comm::Machine machine , + size_t elem_count_begin , + size_t elem_count_end , + size_t count_run ); + + +//---------------------------------------------------------------------------- + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace { + +bool run_host( std::istream & input , + comm::Machine machine , + const size_t host_gang_count , + const size_t host_gang_worker_count ) +{ + bool cmd_error = false ; + + std::string which ; input >> which ; + + if ( which == std::string("fixture") ) { + + size_t nx = 0 , ny = 0 , nz = 0 ; + input >> nx >> ny >> nz ; + test_host_fixture( machine , host_gang_count , host_gang_worker_count , nx , ny , nz ); + + } + else if ( which == std::string("explicit") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_host_explicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("implicit") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_host_implicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("nonlinear") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_host_nonlinear( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("nonlinear_quadratic") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_host_nonlinear_quadratic( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run ); + + } + else { + cmd_error = true ; + } + + return cmd_error ; +} + +#if defined( KOKKOS_HAVE_CUDA ) +bool run_cuda( std::istream & input , comm::Machine machine ) +{ + bool cmd_error = false ; + + std::string which ; input >> which ; + + if ( which == std::string("fixture") ) { + + size_t nx = 0 , ny = 0 , nz = 0 ; + input >> nx >> ny >> nz ; + test_cuda_fixture( machine , nx , ny , nz ); + + } + else if ( which == std::string("explicit") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_cuda_explicit( machine , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("implicit") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_cuda_implicit( machine , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("nonlinear") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_cuda_nonlinear( machine , mesh_node_begin , mesh_node_end , run ); + + } + else if ( which == std::string("nonlinear_quadratic") ) { + + size_t mesh_node_begin = 100 ; + size_t mesh_node_end = 300 ; + size_t run = 1 ; + input >> mesh_node_begin >> mesh_node_end >> run ; + test_cuda_nonlinear_quadratic( machine , mesh_node_begin , mesh_node_end , run ); + + } + else { + cmd_error = true ; + } + + return cmd_error ; +} +#endif + +void run( const std::string & argline , comm::Machine machine ) +{ + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + std::istringstream input( argline ); + + bool cmd_error = false ; + + std::string which ; input >> which ; + + if ( which == std::string("query") ) { + std::cout << "P" << comm::rank( machine ) + << ": hwloc { NUMA[" << numa_count << "]" + << " CORE[" << cores_per_numa << "]" + << " PU[" << threads_per_core << "] }" + << std::endl ; +#if defined( KOKKOS_HAVE_CUDA ) + test_cuda_query( machine ); +#endif + } + else if ( which == std::string("partition") ) { + if ( 0 == comm::rank( machine ) ) { + test_box_partition( false /* print flag */ ); + } + } + else { + if ( which == std::string("host") ) { + size_t host_gang_count = 0 ; + size_t host_gang_worker_count = 1 ; + + input >> host_gang_count ; + input >> host_gang_worker_count ; + + cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count ); + } + else if ( which == std::string("host-all") ) { + size_t host_gang_count = numa_count ; + size_t host_gang_worker_count = cores_per_numa * threads_per_core ; + + cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count ); + } + else if ( which == std::string("host-most") ) { + size_t host_gang_count = numa_count ; + size_t host_gang_worker_count = ( cores_per_numa - 1 ) * threads_per_core ; + + cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count ); + } +#if defined( KOKKOS_HAVE_CUDA ) + else if ( which == std::string("cuda") ) { + cmd_error = run_cuda( input , machine ); + } +#endif + else { + cmd_error = true ; + } + } + + if ( cmd_error && 0 == comm::rank( machine ) ) { + std::cout << "Expecting command line with" << std::endl + << " query" << std::endl + << " partition" << std::endl + << " host NumNumaNode NumThreadPerNode <test>" << std::endl + << " host-all <test>" << std::endl + << " host-most <test>" << std::endl + << " cuda <test>" << std::endl + << "where <test> is" << std::endl + << " fixture NumElemX NumElemY NumElemZ" << std::endl + << " implicit NumElemBegin NumElemEnd NumRun" << std::endl + << " explicit NumElemBegin NumElemEnd NumRun" << std::endl + << " nonlinear NumElemBegin NumElemEnd NumRun" << std::endl + << " nonlinear_quadratic NumElemBegin NumElemEnd NumRun" << std::endl ; + + } +} + +} // namespace + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +int main( int argc , char ** argv ) +{ + comm::Machine machine = comm::Machine::init( & argc , & argv ); + + const unsigned comm_rank = comm::rank( machine ); + + const std::string argline = comm::command_line( machine , argc , argv ); + + try { + run( argline , machine ); + } + catch( const std::exception & x ) { + std::cerr << "P" << comm_rank << " throw: " << x.what() << std::endl ; + } + catch( ... ) { + std::cerr << "P" << comm_rank << " throw: unknown exception" << std::endl ; + } + + comm::Machine::finalize(); + + return 0 ; +} + diff --git a/lib/kokkos/example/query_device/CMakeLists.txt b/lib/kokkos/example/query_device/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dade7f01fef5c935ab3e11bcffc5722ed4b9d1d5 --- /dev/null +++ b/lib/kokkos/example/query_device/CMakeLists.txt @@ -0,0 +1,14 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") + +FILE(GLOB SOURCES *.cpp) + +TRIBITS_ADD_EXECUTABLE( + query_device + SOURCES ${SOURCES} + COMM serial mpi + ) + diff --git a/lib/kokkos/example/query_device/Makefile b/lib/kokkos/example/query_device/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbea3e09a5d71f900de85ff2100cf41bd5738 --- /dev/null +++ b/lib/kokkos/example/query_device/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/query_device/query_device.cpp b/lib/kokkos/example/query_device/query_device.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ced8cc4e95a5170441ecf1f9fae9113c885dd7d4 --- /dev/null +++ b/lib/kokkos/example/query_device/query_device.cpp @@ -0,0 +1,100 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <iostream> +#include <sstream> + +#include <Kokkos_Macros.hpp> + +#if defined( KOKKOS_HAVE_MPI ) +#include <mpi.h> +#endif + +#include <Kokkos_Core.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +int main( int argc , char ** argv ) +{ + std::ostringstream msg ; + +#if defined( KOKKOS_HAVE_MPI ) + + MPI_Init( & argc , & argv ); + + int mpi_rank = 0 ; + + MPI_Comm_rank( MPI_COMM_WORLD , & mpi_rank ); + + msg << "MPI rank(" << mpi_rank << ") " ; + +#endif + + msg << "{" << std::endl ; + + if ( Kokkos::hwloc::available() ) { + msg << "hwloc( NUMA[" << Kokkos::hwloc::get_available_numa_count() + << "] x CORE[" << Kokkos::hwloc::get_available_cores_per_numa() + << "] x HT[" << Kokkos::hwloc::get_available_threads_per_core() + << "] )" + << std::endl ; + } + +#if defined( KOKKOS_HAVE_CUDA ) + Kokkos::Cuda::print_configuration( msg ); +#endif + + msg << "}" << std::endl ; + + std::cout << msg.str(); + +#if defined( KOKKOS_HAVE_MPI ) + + MPI_Finalize(); + +#endif + + return 0 ; +} + diff --git a/lib/kokkos/example/sort_array/CMakeLists.txt b/lib/kokkos/example/sort_array/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0c7da74f4a9b94dbcdb2a2dc5d192203a319b048 --- /dev/null +++ b/lib/kokkos/example/sort_array/CMakeLists.txt @@ -0,0 +1,14 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(SOURCES "") + +FILE(GLOB SOURCES *.cpp) + +TRIBITS_ADD_EXECUTABLE( + sort_array + SOURCES ${SOURCES} + COMM serial mpi + ) + diff --git a/lib/kokkos/example/sort_array/Makefile b/lib/kokkos/example/sort_array/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbea3e09a5d71f900de85ff2100cf41bd5738 --- /dev/null +++ b/lib/kokkos/example/sort_array/Makefile @@ -0,0 +1,53 @@ +KOKKOS_PATH ?= ../.. + +MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +SRC_DIR := $(dir $(MAKEFILE_PATH)) + +SRC = $(wildcard $(SRC_DIR)/*.cpp) +OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) + +#SRC = $(wildcard *.cpp) +#OBJ = $(SRC:%.cpp=%.o) + +default: build + echo "Start Build" + +# use installed Makefile.kokkos +include $(KOKKOS_PATH)/Makefile.kokkos + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = $(NVCC_WRAPPER) +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "Cuda,OpenMP" +#KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -I$(SRC_DIR) -O3 +LINK = $(CXX) +LINKFLAGS = +EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +LIB = + + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: + rm -f *.a *.o *.cuda *.host + +# Compilation rules + +%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + diff --git a/lib/kokkos/example/sort_array/main.cpp b/lib/kokkos/example/sort_array/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..74c4ab154cf0856bd53f654e2f33dd884d49ddcd --- /dev/null +++ b/lib/kokkos/example/sort_array/main.cpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <string.h> +#include <stdlib.h> +#include <iostream> +#include <sstream> + +#include <Kokkos_Core.hpp> + +#include <sort_array.hpp> + + +int main( int argc , char ** argv ) +{ +#if defined( KOKKOS_HAVE_CUDA ) || defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_OPENMP ) + Kokkos::initialize( argc , argv ); + + int length_array = 100000 ; + + for ( int i = 0 ; i < argc ; ++i ) { + if ( 0 == strcmp( argv[i] , "length_array" ) ) { + length_array = atoi( argv[i+1] ); + } + } + + int length_total_array = length_array * 100; + +#if defined( KOKKOS_HAVE_CUDA ) + if ( Kokkos::Cuda::is_initialized() ) { + std::cout << "Kokkos::Cuda" << std::endl ; + Example::sort_array< Kokkos::Cuda >( length_array , length_total_array ); + } +#endif + +#if defined( KOKKOS_HAVE_PTHREAD ) + if ( Kokkos::Threads::is_initialized() ) { + std::cout << "Kokkos::Threads" << std::endl ; + Example::sort_array< Kokkos::Threads >( length_array , length_total_array ); + } +#endif + +#if defined( KOKKOS_HAVE_OPENMP ) + if ( Kokkos::OpenMP::is_initialized() ) { + std::cout << "Kokkos::OpenMP" << std::endl ; + Example::sort_array< Kokkos::OpenMP >( length_array , length_total_array ); + } +#endif + + Kokkos::finalize(); +#endif + + return 0 ; +} + diff --git a/lib/kokkos/example/sort_array/sort_array.hpp b/lib/kokkos/example/sort_array/sort_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d21f9989582c7be28e7c5c1c0f325330cc340e78 --- /dev/null +++ b/lib/kokkos/example/sort_array/sort_array.hpp @@ -0,0 +1,190 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef EXAMPLE_SORT_ARRAY +#define EXAMPLE_SORT_ARRAY + +#include <stdlib.h> +#include <algorithm> + +#include <Kokkos_Core.hpp> + +#include <impl/Kokkos_Timer.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Example { + +template< class Device > +struct SortView { + + template< typename ValueType > + SortView( const Kokkos::View<ValueType*,Device> v , int begin , int end ) + { + std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end ); + } +}; + +} + +#if defined(KOKKOS_HAVE_CUDA) + +#include <thrust/device_ptr.h> +#include <thrust/sort.h> + +namespace Example { + +template<> +struct SortView< Kokkos::Cuda > { + template< typename ValueType > + SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end ) + { + thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin ) + , thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) ); + } +}; + +} + +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Example { + +template< class Device > +void sort_array( const size_t array_length /* length of spans of array to sort */ + , const size_t total_length /* total length of array */ + , const int print = 1 ) +{ + typedef Device execution_space ; + typedef Kokkos::View<int*,Device> device_array_type ; + +#if defined( KOKKOS_HAVE_CUDA ) + + typedef typename + Kokkos::Impl::if_c< Kokkos::Impl::is_same< Device , Kokkos::Cuda >::value + , Kokkos::View<int*,Kokkos::Cuda::array_layout,Kokkos::CudaHostPinnedSpace> + , typename device_array_type::HostMirror + >::type host_array_type ; + +#else + + typedef typename device_array_type::HostMirror host_array_type ; + +#endif + + Kokkos::Timer timer; + + const device_array_type work_array("work_array" , array_length ); + const host_array_type host_array("host_array" , total_length ); + + std::cout << "sort_array length( " << total_length << " )" + << " in chunks( " << array_length << " )" + << std::endl ; + + double sec = timer.seconds(); + std::cout << "declaring Views took " + << sec << " seconds" << std::endl; + timer.reset(); + + for ( size_t i = 0 ; i < total_length ; ++i ) { + host_array(i) = ( lrand48() * total_length ) >> 31 ; + } + + sec = timer.seconds(); + std::cout << "initializing " << total_length << " elements on host took " + << sec << " seconds" << std::endl; + timer.reset(); + + double sec_copy_in = 0 ; + double sec_sort = 0 ; + double sec_copy_out = 0 ; + double sec_error = 0 ; + size_t error_count = 0 ; + + for ( size_t begin = 0 ; begin < total_length ; begin += array_length ) { + + const size_t end = begin + array_length < total_length + ? begin + array_length : total_length ; + + const std::pair<size_t,size_t> host_range(begin,end); + + const host_array_type host_subarray = Kokkos::subview( host_array , host_range ); + + timer.reset(); + + Kokkos::deep_copy( work_array , host_subarray ); + + sec_copy_in += timer.seconds(); timer.reset(); + + SortView< execution_space >( work_array , 0 , end - begin ); + + sec_sort += timer.seconds(); timer.reset(); + + Kokkos::deep_copy( host_subarray , work_array ); + + sec_copy_out += timer.seconds(); timer.reset(); + + for ( size_t i = begin + 1 ; i < end ; ++i ) { + if ( host_array(i) < host_array(i-1) ) ++error_count ; + } + + sec_error += timer.seconds(); timer.reset(); + } + + std::cout << "copy to device " << sec_copy_in << " seconds" << std::endl + << "sort on device " << sec_sort << " seconds" << std::endl + << "copy from device " << sec_copy_out << " seconds" << std::endl + << "errors " << error_count << " took " << sec_error << " seconds" << std::endl + ; +} + +} // namespace Example + +//---------------------------------------------------------------------------- + +#endif /* #ifndef EXAMPLE_SORT_ARRAY */ + diff --git a/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt b/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e5b1fcb46ffbdcb7dacf3bcb6627fa90c7a1157 --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt @@ -0,0 +1,11 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_01_hello_world + SOURCES hello_world.cpp + COMM serial mpi + ) + diff --git a/lib/kokkos/example/tutorial/01_hello_world/Makefile b/lib/kokkos/example/tutorial/01_hello_world/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78a9fed0cce641b48c85f4d67a1d0ab6c5a63388 --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp b/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp new file mode 100644 index 0000000000000000000000000000000000000000..459b9b094fc93475ef62482ab4fff62d956cc9eb --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp @@ -0,0 +1,130 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> +#include <typeinfo> + +// +// "Hello world" parallel_for example: +// 1. Start up Kokkos +// 2. Execute a parallel for loop in the default execution space, +// using a functor to define the loop body +// 3. Shut down Kokkos +// +// If Kokkos was built with C++11 enabled, try comparing this example +// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous +// functions) to define the loop body of the parallel_for. That makes +// the code much more concise and readable. On the other hand, +// breaking out the loop body into an explicit functor makes it easier +// to test the loop independently of the parallel pattern. +// + +// Functor that defines the parallel_for's loop body. +// +// A "functor" is just a class or struct with a public operator() +// instance method. +struct hello_world { + // If a functor has an "execution_space" (or "execution_space", for + // backwards compatibility) public typedef, parallel_* will only run + // the functor in that execution space. That's a good way to mark a + // functor as specific to an execution space. If the functor lacks + // this typedef, parallel_for will run it in the default execution + // space, unless you tell it otherwise (that's an advanced topic; + // see "execution policies"). + + // The functor's operator() defines the loop body. It takes an + // integer argument which is the parallel for loop index. Other + // arguments are possible; see the "hierarchical parallelism" part + // of the tutorial. + // + // The operator() method must be const, and must be marked with the + // KOKKOS_INLINE_FUNCTION macro. If building with CUDA, this macro + // will mark your method as suitable for running on the CUDA device + // (as well as on the host). If not building with CUDA, the macro + // is unnecessary but harmless. + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + printf ("Hello from i = %i\n", i); + } +}; + +int main (int argc, char* argv[]) { + // You must call initialize() before you may call Kokkos. + // + // With no arguments, this initializes the default execution space + // (and potentially its host execution space) with default + // parameters. You may also pass in argc and argv, analogously to + // MPI_Init(). It reads and removes command-line arguments that + // start with "--kokkos-". + Kokkos::initialize (argc, argv); + + // Print the name of Kokkos' default execution space. We're using + // typeid here, so the name might get a bit mangled by the linker, + // but you should still be able to figure out what it is. + printf ("Hello World on Kokkos execution space %s\n", + typeid (Kokkos::DefaultExecutionSpace).name ()); + + // Run the above functor on the default Kokkos execution space in + // parallel, with a parallel for loop count of 15. + // + // The Kokkos::DefaultExecutionSpace typedef gives the default + // execution space. Depending on how Kokkos was configured, this + // could be OpenMP, Threads, Cuda, Serial, or even some other + // execution space. + // + // The following line of code would look like this in OpenMP: + // + // #pragma omp parallel for + // for (int i = 0; i < 15; ++i) { + // printf ("Hello from i = %i\n", i); + // } + // + // You may notice that the printed numbers do not print out in + // order. Parallel for loops may execute in any order. + Kokkos::parallel_for ("HelloWorld",15, hello_world ()); + + // You must call finalize() after you are done using Kokkos. + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fcca4bceba577bf644f1929e1c62c1893b5d5a5 --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt @@ -0,0 +1,13 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF (Kokkos_ENABLE_CXX11) + # This is a tutorial, not a test, so we don't ask CTest to run it. + TRIBITS_ADD_EXECUTABLE( + tutorial_01_hello_world_lambda + SOURCES hello_world_lambda.cpp + COMM serial mpi + ) +ENDIF () + diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..95ee2c47feacf363f99052173a28596144a75734 --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +KOKKOS_CUDA_OPTIONS = "enable_lambda" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6c9cc5e4380d4ea8b825c9305f2e7cea6316a10 --- /dev/null +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> +#include <typeinfo> + +// +// "Hello world" parallel_for example: +// 1. Start up Kokkos +// 2. Execute a parallel for loop in the default execution space, +// using a C++11 lambda to define the loop body +// 3. Shut down Kokkos +// +// This example only builds if C++11 is enabled. Compare this example +// to 01_hello_world, which uses functors (explicitly defined classes) +// to define the loop body of the parallel_for. Both functors and +// lambdas have their places. +// + +int main (int argc, char* argv[]) { + // You must call initialize() before you may call Kokkos. + // + // With no arguments, this initializes the default execution space + // (and potentially its host execution space) with default + // parameters. You may also pass in argc and argv, analogously to + // MPI_Init(). It reads and removes command-line arguments that + // start with "--kokkos-". + Kokkos::initialize (argc, argv); + + // Print the name of Kokkos' default execution space. We're using + // typeid here, so the name might get a bit mangled by the linker, + // but you should still be able to figure out what it is. + printf ("Hello World on Kokkos execution space %s\n", + typeid (Kokkos::DefaultExecutionSpace).name ()); + + // Run lambda on the default Kokkos execution space in parallel, + // with a parallel for loop count of 15. The lambda's argument is + // an integer which is the parallel for's loop index. As you learn + // about different kinds of parallelism, you will find out that + // there are other valid argument types as well. + // + // For a single level of parallelism, we prefer that you use the + // KOKKOS_LAMBDA macro. If CUDA is disabled, this just turns into + // [=]. That captures variables from the surrounding scope by + // value. Do NOT capture them by reference! If CUDA is enabled, + // this macro may have a special definition that makes the lambda + // work correctly with CUDA. Compare to the KOKKOS_INLINE_FUNCTION + // macro, which has a special meaning if CUDA is enabled. + // + // The following parallel_for would look like this if we were using + // OpenMP by itself, instead of Kokkos: + // + // #pragma omp parallel for + // for (int i = 0; i < 15; ++i) { + // printf ("Hello from i = %i\n", i); + // } + // + // You may notice that the printed numbers do not print out in + // order. Parallel for loops may execute in any order. + Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) { + // printf works in a CUDA parallel kernel; std::ostream does not. + printf ("Hello from i = %i\n", i); + }); + + // You must call finalize() after you are done using Kokkos. + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt b/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c78db840f849fd9625676c6a73e8aa037b52b4d --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_02_simple_reduce + SOURCES simple_reduce.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78a9fed0cce641b48c85f4d67a1d0ab6c5a63388 --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/simple_reduce.cpp b/lib/kokkos/example/tutorial/02_simple_reduce/simple_reduce.cpp new file mode 100644 index 0000000000000000000000000000000000000000..236618f7be9cf59044169cb82b0894e43bf8351e --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce/simple_reduce.cpp @@ -0,0 +1,101 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// +// First reduction (parallel_reduce) example: +// 1. Start up Kokkos +// 2. Execute a parallel_reduce loop in the default execution space, +// using a functor to define the loop body +// 3. Shut down Kokkos +// +// Compare this example to 02_simple_reduce_lambda, which uses a C++11 +// lambda to define the loop body of the parallel_reduce. +// + +// Reduction functor for computing the sum of squares. +// +// More advanced reduction examples will show how to control the +// reduction's "join" operator. If the join operator is not provided, +// it defaults to binary operator+ (adding numbers together). +struct squaresum { + // Specify the type of the reduction value with a "value_type" + // typedef. In this case, the reduction value has type int. + typedef int value_type; + + // The reduction functor's operator() looks a little different than + // the parallel_for functor's operator(). For the reduction, we + // pass in both the loop index i, and the intermediate reduction + // value lsum. The latter MUST be passed in by nonconst reference. + // (If the reduction type is an array like int[], indicating an + // array reduction result, then the second argument is just int[].) + KOKKOS_INLINE_FUNCTION + void operator () (const int i, int& lsum) const { + lsum += i*i; // compute the sum of squares + } +}; + +int main (int argc, char* argv[]) { + Kokkos::initialize (argc, argv); + const int n = 10; + + // Compute the sum of squares of integers from 0 to n-1, in + // parallel, using Kokkos. + int sum = 0; + Kokkos::parallel_reduce (n, squaresum (), sum); + printf ("Sum of squares of integers from 0 to %i, " + "computed in parallel, is %i\n", n - 1, sum); + + // Compare to a sequential loop. + int seqSum = 0; + for (int i = 0; i < n; ++i) { + seqSum += i*i; + } + printf ("Sum of squares of integers from 0 to %i, " + "computed sequentially, is %i\n", n - 1, seqSum); + Kokkos::finalize (); + return (sum == seqSum) ? 0 : -1; +} + diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e2e3a929f1ade97ce639670a3f28c43bb9ce084f --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt @@ -0,0 +1,12 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF (Kokkos_ENABLE_CXX11) + # This is a tutorial, not a test, so we don't ask CTest to run it. + TRIBITS_ADD_EXECUTABLE( + tutorial_02_simple_reduce_lambda + SOURCES simple_reduce_lambda.cpp + COMM serial mpi + ) +ENDIF () diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..95ee2c47feacf363f99052173a28596144a75734 --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +KOKKOS_CUDA_OPTIONS = "enable_lambda" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a403633a8a898375f2f5c0d4015fc3930570ef0d --- /dev/null +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -0,0 +1,86 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// +// First reduction (parallel_reduce) example: +// 1. Start up Kokkos +// 2. Execute a parallel_reduce loop in the default execution space, +// using a C++11 lambda to define the loop body +// 3. Shut down Kokkos +// +// This example only builds if C++11 is enabled. Compare this example +// to 02_simple_reduce, which uses a functor to define the loop body +// of the parallel_reduce. +// + +int main (int argc, char* argv[]) { + Kokkos::initialize (argc, argv); + const int n = 10; + + // Compute the sum of squares of integers from 0 to n-1, in + // parallel, using Kokkos. This time, use a lambda instead of a + // functor. The lambda takes the same arguments as the functor's + // operator(). + int sum = 0; + // The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=]. + // It also handles any other syntax needed for CUDA. + Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) { + lsum += i*i; + }, sum); + printf ("Sum of squares of integers from 0 to %i, " + "computed in parallel, is %i\n", n - 1, sum); + + // Compare to a sequential loop. + int seqSum = 0; + for (int i = 0; i < n; ++i) { + seqSum += i*i; + } + printf ("Sum of squares of integers from 0 to %i, " + "computed sequentially, is %i\n", n - 1, seqSum); + Kokkos::finalize (); + return (sum == seqSum) ? 0 : -1; +} + diff --git a/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt b/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7475a99e492bcf88c6a3ca9b98cc698fa9a38b3d --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_03_simple_view + SOURCES simple_view.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/03_simple_view/Makefile b/lib/kokkos/example/tutorial/03_simple_view/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78a9fed0cce641b48c85f4d67a1d0ab6c5a63388 --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/03_simple_view/simple_view.cpp b/lib/kokkos/example/tutorial/03_simple_view/simple_view.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dbbb9d867dc626ae251efe3fa3f5be27b2b8dfcf --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view/simple_view.cpp @@ -0,0 +1,142 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// +// First Kokkos::View (multidimensional array) example: +// 1. Start up Kokkos +// 2. Allocate a Kokkos::View +// 3. Execute a parallel_for and a parallel_reduce over that View's data +// 4. Shut down Kokkos +// +// Compare this example to 03_simple_view_lambda, which uses C++11 +// lambdas to define the loop bodies of the parallel_for and +// parallel_reduce. +// + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// A Kokkos::View is an array of zero or more dimensions. The number +// of dimensions is specified at compile time, as part of the type of +// the View. This array has two dimensions. The first one +// (represented by the asterisk) is a run-time dimension, and the +// second (represented by [3]) is a compile-time dimension. Thus, +// this View type is an N x 3 array of type double, where N is +// specified at run time in the View's constructor. +// +// The first dimension of the View is the dimension over which it is +// efficient for Kokkos to parallelize. +typedef Kokkos::View<double*[3]> view_type; + +// parallel_for functor that fills the View given to its constructor. +// The View must already have been allocated. +struct InitView { + view_type a; + + // Views have "view semantics." This means that they behave like + // pointers, not like std::vector. Their copy constructor and + // operator= only do shallow copies. Thus, you can pass View + // objects around by "value"; they won't do a deep copy unless you + // explicitly ask for a deep copy. + InitView (view_type a_) : + a (a_) + {} + + // Fill the View with some data. The parallel_for loop will iterate + // over the View's first dimension N. + KOKKOS_INLINE_FUNCTION + void operator () (const int i) const { + // Acesss the View just like a Fortran array. The layout depends + // on the View's memory space, so don't rely on the View's + // physical memory layout unless you know what you're doing. + a(i,0) = 1.0*i; + a(i,1) = 1.0*i*i; + a(i,2) = 1.0*i*i*i; + } +}; + +// Reduction functor that reads the View given to its constructor. +struct ReduceFunctor { + view_type a; + + // Constructor takes View by "value"; this does a shallow copy. + ReduceFunctor (view_type a_) : a (a_) {} + + // If you write a functor to do a reduction, you must specify the + // type of the reduction result via a public 'value_type' typedef. + typedef double value_type; + + KOKKOS_INLINE_FUNCTION + void operator() (int i, double &lsum) const { + lsum += a(i,0)*a(i,1)/(a(i,2)+0.1); + } +}; + +int main (int argc, char* argv[]) { + Kokkos::initialize (argc, argv); + const int N = 10; + + // Allocate the View. The first dimension is a run-time parameter + // N. We set N = 10 here. The second dimension is a compile-time + // parameter, 3. We don't specify it here because we already set it + // by declaring the type of the View. + // + // Views get initialized to zero by default. This happens in + // parallel, using the View's memory space's default execution + // space. Parallel initialization ensures first-touch allocation. + // There is a way to shut off default initialization. + // + // You may NOT allocate a View inside of a parallel_{for, reduce, + // scan}. Treat View allocation as a "thread collective." + // + // The string "A" is just the label; it only matters for debugging. + // Different Views may have the same label. + view_type a ("A", N); + + Kokkos::parallel_for (N, InitView (a)); + double sum = 0; + Kokkos::parallel_reduce (N, ReduceFunctor (a), sum); + printf ("Result: %f\n", sum); + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..601fe452a4c90a1506aa012a6a99a617fbc1d9af --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt @@ -0,0 +1,12 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF (Kokkos_ENABLE_CXX11) + # This is a tutorial, not a test, so we don't ask CTest to run it. + TRIBITS_ADD_EXECUTABLE( + tutorial_03_simple_view_lambda + SOURCES simple_view_lambda.cpp + COMM serial mpi + ) +ENDIF () diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..95ee2c47feacf363f99052173a28596144a75734 --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +KOKKOS_CUDA_OPTIONS = "enable_lambda" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..974af747763bfba23a2f6d3dfeefe68fb9ec4e25 --- /dev/null +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// +// First Kokkos::View (multidimensional array) example: +// 1. Start up Kokkos +// 2. Allocate a Kokkos::View +// 3. Execute a parallel_for and a parallel_reduce over that View's data +// 4. Shut down Kokkos +// +// Compare this example to 03_simple_view, which uses functors to +// define the loop bodies of the parallel_for and parallel_reduce. +// + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// A Kokkos::View is an array of zero or more dimensions. The number +// of dimensions is specified at compile time, as part of the type of +// the View. This array has two dimensions. The first one +// (represented by the asterisk) is a run-time dimension, and the +// second (represented by [3]) is a compile-time dimension. Thus, +// this View type is an N x 3 array of type double, where N is +// specified at run time in the View's constructor. +// +// The first dimension of the View is the dimension over which it is +// efficient for Kokkos to parallelize. +typedef Kokkos::View<double*[3]> view_type; + +int main (int argc, char* argv[]) { + Kokkos::initialize (argc, argv); + + // Allocate the View. The first dimension is a run-time parameter + // N. We set N = 10 here. The second dimension is a compile-time + // parameter, 3. We don't specify it here because we already set it + // by declaring the type of the View. + // + // Views get initialized to zero by default. This happens in + // parallel, using the View's memory space's default execution + // space. Parallel initialization ensures first-touch allocation. + // There is a way to shut off default initialization. + // + // You may NOT allocate a View inside of a parallel_{for, reduce, + // scan}. Treat View allocation as a "thread collective." + // + // The string "A" is just the label; it only matters for debugging. + // Different Views may have the same label. + view_type a ("A", 10); + + // Fill the View with some data. The parallel_for loop will iterate + // over the View's first dimension N. + // + // Note that the View is passed by value into the lambda. The macro + // KOKKOS_LAMBDA includes the "capture by value" clause [=]. This + // tells the lambda to "capture all variables in the enclosing scope + // by value." Views have "view semantics"; they behave like + // pointers, not like std::vector. Passing them by value does a + // shallow copy. A deep copy never happens unless you explicitly + // ask for one. + Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) { + // Acesss the View just like a Fortran array. The layout depends + // on the View's memory space, so don't rely on the View's + // physical memory layout unless you know what you're doing. + a(i,0) = 1.0*i; + a(i,1) = 1.0*i*i; + a(i,2) = 1.0*i*i*i; + }); + // Reduction functor that reads the View given to its constructor. + double sum = 0; + Kokkos::parallel_reduce (10, KOKKOS_LAMBDA (const int i, double& lsum) { + lsum += a(i,0)*a(i,1)/(a(i,2)+0.1); + }, sum); + printf ("Result: %f\n", sum); + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt b/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..09f209077a08d64c86454a59875ecda8d329e2f7 --- /dev/null +++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_04_simple_memoryspaces + SOURCES simple_memoryspaces.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78a9fed0cce641b48c85f4d67a1d0ab6c5a63388 --- /dev/null +++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/simple_memoryspaces.cpp b/lib/kokkos/example/tutorial/04_simple_memoryspaces/simple_memoryspaces.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6a4bebfa525c6114681122af59e0dfaf1a71c70 --- /dev/null +++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/simple_memoryspaces.cpp @@ -0,0 +1,101 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// The type of a two-dimensional N x 3 array of double. +// It lives in Kokkos' default memory space. +typedef Kokkos::View<double*[3]> view_type; + +// The "HostMirror" type corresponding to view_type above is also a +// two-dimensional N x 3 array of double. However, it lives in the +// host memory space corresponding to view_type's memory space. For +// example, if view_type lives in CUDA device memory, host_view_type +// lives in host (CPU) memory. Furthermore, declaring host_view_type +// as the host mirror of view_type means that host_view_type has the +// same layout as view_type. This makes it easier to copy between the +// two Views. +// Advanced issues: If a memory space is accessible from the host without +// performance penalties then it is its own host_mirror_space. This is +// the case for HostSpace, CudaUVMSpace and CudaHostPinnedSpace. + +typedef view_type::HostMirror host_view_type; + +struct ReduceFunctor { + view_type a; + ReduceFunctor (view_type a_) : a (a_) {} + typedef int value_type; //Specify type for reduction value, lsum + + KOKKOS_INLINE_FUNCTION + void operator() (int i, int &lsum) const { + lsum += a(i,0)-a(i,1)+a(i,2); + } +}; + +int main() { + Kokkos::initialize(); + + view_type a ("A", 10); + // If view_type and host_mirror_type live in the same memory space, + // a "mirror view" is just an alias, and deep_copy does nothing. + // Otherwise, a mirror view of a device View lives in host memory, + // and deep_copy does a deep copy. + host_view_type h_a = Kokkos::create_mirror_view (a); + + // The View h_a lives in host (CPU) memory, so it's legal to fill + // the view sequentially using ordinary code, like this. + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 3; j++) { + h_a(i,j) = i*10 + j; + } + } + Kokkos::deep_copy (a, h_a); // Copy from host to device. + + int sum = 0; + Kokkos::parallel_reduce (10, ReduceFunctor (a), sum); + printf ("Result is %i\n",sum); + + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt b/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a5790fb0488be791112c3ef0c38655e6da78724 --- /dev/null +++ b/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_05_simple_atomics + SOURCES simple_atomics.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78a9fed0cce641b48c85f4d67a1d0ab6c5a63388 --- /dev/null +++ b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp b/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..de9c219d5b0b9576b042caaefcc2f7b149901647 --- /dev/null +++ b/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp @@ -0,0 +1,137 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> +#include <cstdlib> +#include <cmath> + +// Type of a one-dimensional length-N array of int. +typedef Kokkos::View<int*> view_type; +typedef view_type::HostMirror host_view_type; +// This is a "zero-dimensional" View, that is, a View of a single +// value (an int, in this case). Access the value using operator() +// with no arguments: e.g., 'count()'. +// +// Zero-dimensional Views are useful for reduction results that stay +// resident in device memory, as well as for irregularly updated +// shared state. We use it for the latter in this example. +typedef Kokkos::View<int> count_type; +typedef count_type::HostMirror host_count_type; + + +// Functor for finding a list of primes in a given set of numbers. If +// run in parallel, the order of results is nondeterministic, because +// hardware atomic updates do not guarantee an order of execution. +struct findprimes { + view_type data; + view_type result; + count_type count; + + findprimes (view_type data_, view_type result_, count_type count_) : + data (data_), result (result_), count (count_) + {} + + // Test if data(i) is prime. If it is, increment the count of + // primes (stored in the zero-dimensional View 'count') and add the + // value to the current list of primes 'result'. + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + const int number = data(i); // the current number + + // Test all numbers from 3 to ceiling(sqrt(data(i))), to see if + // they are factors of data(i). It's not the most efficient prime + // test, but it works. + const int upper_bound = sqrt(1.0*number)+1; + bool is_prime = !(number%2 == 0); + int k = 3; + while (k < upper_bound && is_prime) { + is_prime = !(number%k == 0); + k += 2; // don't have to test even numbers + } + + if (is_prime) { + // Use an atomic update both to update the current count of + // primes, and to find a place in the current list of primes for + // the new result. + // + // atomic_fetch_add results the _current_ count, but increments + // it (by 1 in this case). The current count of primes indexes + // into the first unoccupied position of the 'result' array. + const int idx = Kokkos::atomic_fetch_add (&count(), 1); + result(idx) = number; + } + } + +}; + +int main () { + Kokkos::initialize (); + + srand (61391); // Set the random seed + + int nnumbers = 100000; + view_type data ("RND", nnumbers); + view_type result ("Prime", nnumbers); + count_type count ("Count"); + + host_view_type h_data = Kokkos::create_mirror_view (data); + host_view_type h_result = Kokkos::create_mirror_view (result); + host_count_type h_count = Kokkos::create_mirror_view (count); + + typedef view_type::size_type size_type; + // Fill the 'data' array on the host with random numbers. We assume + // that they come from some process which is only implemented on the + // host, via some library. (That's true in this case.) + for (size_type i = 0; i < data.dimension_0 (); ++i) { + h_data(i) = rand () % nnumbers; + } + Kokkos::deep_copy (data, h_data); // copy from host to device + + Kokkos::parallel_for (data.dimension_0 (), findprimes (data, result, count)); + Kokkos::deep_copy (h_count, count); // copy from device to host + + printf ("Found %i prime numbers in %i random numbers\n", h_count(), nnumbers); + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2eb3a8f6c98d69c394f83591e59aa7073f1e59e2 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_advancedviews_01_data_layouts + SOURCES data_layouts.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8406c504c9678cbe86dd8bd84b0f9e00391e8737 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp @@ -0,0 +1,171 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> + +// These two View types are both 2-D arrays of double. However, they +// have different layouts in memory. left_type has "layout left," +// which means "column major," the same as in Fortran, the BLAS, or +// LAPACK. right_type has "layout right," which means "row major," +// the same as in C, C++, or Java. +typedef Kokkos::View<double**, Kokkos::LayoutLeft> left_type; +typedef Kokkos::View<double**, Kokkos::LayoutRight> right_type; +// This is a one-dimensional View, so the layout matters less. +// However, it still has a layout! Since its layout is not specified +// explicitly in the type, its layout is a function of the memory +// space. For example, the default Cuda layout is LayoutLeft, and the +// default Host layout is LayoutRight. +typedef Kokkos::View<double*> view_type; + +// parallel_for functor that fills the given View with some data. It +// expects to access the View by rows in parallel: each call i of +// operator() accesses a row. +template<class ViewType> +struct init_view { + ViewType a; + init_view (ViewType a_) : a (a_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const typename ViewType::size_type i) const { + // On CPUs this loop could be vectorized so j should do stride 1 + // access on a for optimal performance. I.e. a should be LayoutRight. + // On GPUs threads should do coalesced loads and stores. That means + // that i should be the stride one access for optimal performance. + for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) { + a(i,j) = 1.0*a.dimension_0()*i + 1.0*j; + } + } +}; + +// Compute a contraction of v1 and v2 into a: +// +// a(i) := sum_j (v1(i,j) * v2(j,i)) +// +// Since the functor is templated on the ViewTypes itself it doesn't matter what +// there layouts are. That means you can use different layouts on different +// architectures. +template<class ViewType1, class ViewType2> +struct contraction { + view_type a; + typename ViewType1::const_type v1; + typename ViewType2::const_type v2; + contraction (view_type a_, ViewType1 v1_, ViewType2 v2_) : + a (a_), v1 (v1_), v2 (v2_) + {} + + // As with the initialization functor the performance of this operator + // depends on the architecture and the chosen data layouts. + // On CPUs optimal would be to vectorize the inner loop, so j should be the + // stride 1 access. That means v1 should be LayoutRight and v2 LayoutLeft. + // In order to get coalesced access on GPUs where i corresponds closely to + // the thread Index, i must be the stride 1 dimension. That means v1 should be + // LayoutLeft and v2 LayoutRight. + KOKKOS_INLINE_FUNCTION + void operator() (const view_type::size_type i) const { + for (view_type::size_type j = 0; j < v1.dimension_1 (); ++j) { + a(i) = v1(i,j)*v2(j,i); + } + } +}; + +// Compute a dot product. This is used for result verification. +struct dot { + view_type a; + dot (view_type a_) : a (a_) {} + typedef double value_type; //Specify type for reduction target, lsum + KOKKOS_INLINE_FUNCTION + void operator() (const view_type::size_type i, double &lsum) const { + lsum += a(i)*a(i); + } +}; + +int main (int narg, char* arg[]) { + // When initializing Kokkos, you may pass in command-line arguments, + // just like with MPI_Init(). Kokkos reserves the right to remove + // arguments from the list that start with '--kokkos-'. + Kokkos::initialize (narg, arg); + + int size = 10000; + view_type a("A",size); + + // Define two views with LayoutLeft and LayoutRight. + left_type l("L",size,10000); + right_type r("R",size,10000); + + // Initialize the data in the views. + Kokkos::parallel_for(size,init_view<left_type>(l)); + Kokkos::parallel_for(size,init_view<right_type>(r)); + Kokkos::fence(); + + // Measure time to execute the contraction kernel when giving it a + // LayoutLeft view for v1 and a LayoutRight view for v2. This should be + // fast on GPUs and slow on CPUs + Kokkos::Timer time1; + Kokkos::parallel_for(size,contraction<left_type,right_type>(a,l,r)); + Kokkos::fence(); + double sec1 = time1.seconds(); + + double sum1 = 0; + Kokkos::parallel_reduce(size,dot(a),sum1); + Kokkos::fence(); + + // Measure time to execute the contraction kernel when giving it a + // LayoutRight view for v1 and a LayoutLeft view for v2. This should be + // fast on CPUs and slow on GPUs + Kokkos::Timer time2; + Kokkos::parallel_for(size,contraction<right_type,left_type>(a,r,l)); + Kokkos::fence(); + double sec2 = time2.seconds(); + + double sum2 = 0; + Kokkos::parallel_reduce(size,dot(a),sum2); + + // Kokkos' reductions are deterministic. + // The results should always be equal. + printf("Result Left/Right %f Right/Left %f (equal result: %i)\n",sec1,sec2,sum2==sum1); + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1963e544d7a113e8b50cf3fa2444df2f95d983e2 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_advancedviews_02_memory_traits + SOURCES memory_traits.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ddd28a97c3739bea326b7b71e09c06e42b05f9f9 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp @@ -0,0 +1,141 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> +#include <cstdlib> + +typedef Kokkos::View<double*> view_type; +// Kokkos::Views have an MemoryTraits template parameter which +// allows users to specify usage scenarios of a View. +// Some of those act simply as hints, which can be used to insert +// optimal load and store paths, others change the symantics of the +// access. The trait Kokkos::Atomic is one of the latter. A view with +// that MemoryTrait will perform any access atomicly (read, write, update). +// +// In this example we use a view with a usage hint for RandomAccess. +// Kokkos::RandomAccess means that we expect to use this view +// with indirect indexing. +// +// In CUDA, RandomAccess allows accesses through the texture +// cache. This only works if the View is read-only, which we enforce +// through the first template parameter. +// +// Note that we are still talking about views of the data, its not a new allocation. +// For example you can have an atomic view of a default view. While you even +// could use both in the same kernel, this could lead to undefined behaviour because +// one of your access paths is not atomic. Think of it in the same way as you think of +// pointers to const data and pointers to non-const data (i.e. const double* and double*). +// While these pointers can point to the same data you should not use them together if that +// brakes the const guarantee of the first pointer. +typedef Kokkos::View<const double*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > view_type_rnd; +typedef Kokkos::View<int**> idx_type; +typedef idx_type::HostMirror idx_type_host; + +// We template this functor on the ViewTypes to show the effect of the RandomAccess trait. +template<class DestType, class SrcType> +struct localsum { + idx_type::const_type idx; + DestType dest; + SrcType src; + localsum (idx_type idx_, DestType dest_, SrcType src_) : + idx (idx_), dest (dest_), src (src_) + {} + + // Calculate a local sum of values + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + double tmp = 0.0; + for (int j = 0; j < (int) idx.dimension_1 (); ++j) { + // This is an indirect access on src + const double val = src(idx(i,j)); + tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val); + } + dest(i) = tmp; + } +}; + +int main(int narg, char* arg[]) { + Kokkos::initialize (narg, arg); + + int size = 1000000; + + idx_type idx("Idx",size,64); + idx_type_host h_idx = Kokkos::create_mirror_view (idx); + + view_type dest ("Dest", size); + view_type src ("Src", size); + + srand(134231); + + for (int i = 0; i < size; i++) { + for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) { + h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size; + } + } + + // Deep copy the initial data to the device + Kokkos::deep_copy(idx,h_idx); + // Run the first kernel to warmup caches + Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src)); + Kokkos::fence(); + + // Run the localsum functor using the RandomAccess trait. On CPUs there should + // not be any different in performance to not using the RandomAccess trait. + // On GPUs where can be a dramatic difference + Kokkos::Timer time1; + Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src)); + Kokkos::fence(); + double sec1 = time1.seconds(); + + Kokkos::Timer time2; + Kokkos::parallel_for(size,localsum<view_type,view_type>(idx,dest,src)); + Kokkos::fence(); + double sec2 = time2.seconds(); + + printf("Time with Trait RandomAccess: %f with Plain: %f \n",sec1,sec2); + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cbe394c78b832f7bee3bb659b2776d5b246adbd1 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_advancedviews_03_subviews + SOURCES subviews.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8001ebaa60664dcb7b644c8b55b5ac1d7a0ba76 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp @@ -0,0 +1,190 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// This example simulates one timestep of an explicit +// finite-difference discretization of a time-dependent partial +// differential equation (PDE). It shows how to take subviews of the +// mesh in order to represent particular boundaries or the interior of +// the mesh. + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> + +typedef Kokkos::View<double***, Kokkos::LayoutRight> mesh_type; + +// These View types represent subviews of the mesh. Some of the Views +// have layout LayoutStride, meaning that they have run-time "strides" +// in each dimension which may differ from that dimension. For +// example, inner_mesh_type (which represents the interior of the +// mesh) has to skip over the boundaries when computing its stride; +// the dimensions of the interior mesh differ from these strides. You +// may safely always use a LayoutStride layout when taking a subview +// of a LayoutRight or LayoutLeft subview, but strided accesses may +// cost a bit more, especially for 1-D Views. +typedef Kokkos::View<double**, Kokkos::LayoutStride> xz_plane_type; +typedef Kokkos::View<double**, Kokkos::LayoutRight> yz_plane_type; +typedef Kokkos::View<double**, Kokkos::LayoutStride> xy_plane_type; +typedef Kokkos::View<double***, Kokkos::LayoutStride> inner_mesh_type; + +// Functor to set all entries of a boundary of the mesh to a constant +// value. The functor is templated on ViewType because different +// boundaries may have different layouts. +template<class ViewType> +struct set_boundary { + ViewType a; + double value; + + set_boundary (ViewType a_, double value_) : + a (a_), value (value_) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (const typename ViewType::size_type i) const { + for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) { + a(i,j) = value; + } + } +}; + +// Functor to set all entries of a boundary of the mesh to a constant +// value. The functor is templated on ViewType because different +// boundaries may have different layouts. +template<class ViewType> +struct set_inner { + ViewType a; + double value; + + set_inner (ViewType a_, double value_) : + a (a_), value (value_) + {} + + KOKKOS_INLINE_FUNCTION + void operator () (const typename ViewType::size_type i) const { + typedef typename ViewType::size_type size_type; + for (size_type j = 0; j < a.dimension_1 (); ++j) { + for (size_type k = 0; k < a.dimension_2 (); ++k) { + a(i,j,k) = value; + } + } + } +}; + +// Update the interior of the mesh. This simulates one timestep of a +// finite-difference method. +template<class ViewType> +struct update { + ViewType a; + const double dt; + + update (ViewType a_, const double dt_) : + a (a_), dt (dt_) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (typename ViewType::size_type i) const { + typedef typename ViewType::size_type size_type; + i++; + for (size_type j = 1; j < a.dimension_1()-1; j++) { + for (size_type k = 1; k < a.dimension_2()-1; k++) { + a(i,j,k) += dt* (a(i,j,k+1) - a(i,j,k-1) + + a(i,j+1,k) - a(i,j-1,k) + + a(i+1,j,k) - a(i-1,j,k)); + } + } + } +}; + + +int main (int narg, char* arg[]) { + using Kokkos::ALL; + using Kokkos::pair; + using Kokkos::parallel_for; + using Kokkos::subview; + typedef mesh_type::size_type size_type; + + Kokkos::initialize (narg, arg); + + // The number of mesh points along each dimension of the mesh, not + // including boundaries. + const size_type size = 100; + + // A is the full cubic 3-D mesh, including the boundaries. + mesh_type A ("A", size+2, size+2, size+2); + // Ai is the "inner" part of A, _not_ including the boundaries. + // + // A pair of indices in a particular dimension means the contiguous + // zero-based index range in that dimension, including the first + // entry of the pair but _not_ including the second entry. + inner_mesh_type Ai = subview(A, pair<size_type, size_type> (1, size+1), + pair<size_type, size_type> (1, size+1), + pair<size_type, size_type> (1, size+1)); + // A has six boundaries, one for each face of the cube. + // Create a View of each of these boundaries. + // ALL() means "select all indices in that dimension." + xy_plane_type Zneg_halo = subview(A, ALL (), ALL (), 0); + xy_plane_type Zpos_halo = subview(A, ALL (), ALL (), 101); + xz_plane_type Yneg_halo = subview(A, ALL (), 0, ALL ()); + xz_plane_type Ypos_halo = subview(A, ALL (), 101, ALL ()); + yz_plane_type Xneg_halo = subview(A, 0, ALL (), ALL ()); + yz_plane_type Xpos_halo = subview(A, 101, ALL (), ALL ()); + + // Set the boundaries to their initial conditions. + parallel_for (Zneg_halo.dimension_0 (), set_boundary<xy_plane_type> (Zneg_halo, 1)); + parallel_for (Zpos_halo.dimension_0 (), set_boundary<xy_plane_type> (Zpos_halo, -1)); + parallel_for (Yneg_halo.dimension_0 (), set_boundary<xz_plane_type> (Yneg_halo, 2)); + parallel_for (Ypos_halo.dimension_0 (), set_boundary<xz_plane_type> (Ypos_halo, -2)); + parallel_for (Xneg_halo.dimension_0 (), set_boundary<yz_plane_type> (Xneg_halo, 3)); + parallel_for (Xpos_halo.dimension_0 (), set_boundary<yz_plane_type> (Xpos_halo, -3)); + + // Set the interior of the mesh to its initial condition. + parallel_for (Ai.dimension_0 (), set_inner<inner_mesh_type> (Ai, 0)); + + // Update the interior of the mesh. + // This simulates one timestep with dt = 0.1. + parallel_for (Ai.dimension_0 (), update<mesh_type> (A, 0.1)); + + printf ("Done\n"); + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..300dab128e45779002cf123d7e7238777abab4d5 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_advancedviews_04_dualviews + SOURCES dual_view.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4905e4bf88485c70527d9080844940a61c60365c --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp @@ -0,0 +1,218 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <Kokkos_DualView.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> +#include <cstdlib> + +// DualView helps you manage data and computations that take place on +// two different memory spaces. Examples include CUDA device memory +// and (CPU) host memory (currently implemented), or Intel Knights +// Landing MCDRAM and DRAM (not yet implemented). For example, if you +// have ported only some parts of you application to run in CUDA, +// DualView can help manage moving data between the parts of your +// application that work best with CUDA, and the parts that work +// better on the CPU. +// +// A DualView takes the same template parameters as a View, but +// contains two Views: One that lives in the DualView's memory space, +// and one that lives in that memory space's host mirror space. If +// both memory spaces are the same, then the two Views just alias one +// another. This means that you can use DualView all the time, even +// when not running in a memory space like CUDA. DualView's +// operations to help you manage memory take almost no time in that +// case. This makes your code even more performance portable. + +typedef Kokkos::DualView<double*> view_type; +typedef Kokkos::DualView<int**> idx_type; + + +template<class ExecutionSpace> +struct localsum { + // If the functor has a public 'execution_space' typedef, that defines + // the functor's execution space (where it runs in parallel). This + // overrides Kokkos' default execution space. + typedef ExecutionSpace execution_space; + + typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value , + idx_type::memory_space, idx_type::host_mirror_space>::type memory_space; + + // Get the view types on the particular device for which the functor + // is instantiated. + // + // "const_data_type" is a typedef in View (and DualView) which is + // the const version of the first template parameter of the View. + // For example, the const_data_type version of double** is const + // double**. + Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx; + // "scalar_array_type" is a typedef in ViewTraits (and DualView) which is the + // array version of the value(s) stored in the View. + Kokkos::View<view_type::scalar_array_type, view_type::array_layout, memory_space> dest; + Kokkos::View<view_type::const_data_type, view_type::array_layout, + memory_space, Kokkos::MemoryRandomAccess> src; + + // Constructor takes DualViews, synchronizes them to the device, + // then marks them as modified on the device. + localsum (idx_type dv_idx, view_type dv_dest, view_type dv_src) + { + // Extract the view on the correct Device (i.e., the correct + // memory space) from the DualView. DualView has a template + // method, view(), which is templated on the memory space. If the + // DualView has a View from that memory space, view() returns the + // View in that space. + idx = dv_idx.view<memory_space> (); + dest = dv_dest.template view<memory_space> (); + src = dv_src.template view<memory_space> (); + + // Synchronize the DualView to the correct Device. + // + // DualView's sync() method is templated on a memory space, and + // synchronizes the DualView in a one-way fashion to that memory + // space. "Synchronizing" means copying, from the other memory + // space to the Device memory space. sync() does _nothing_ if the + // Views on the two memory spaces are in sync. DualView + // determines this by the user manually marking one side or the + // other as modified; see the modify() call below. + + dv_idx.sync<memory_space> (); + dv_dest.template sync<memory_space> (); + dv_src.template sync<memory_space> (); + + // Mark dest as modified on Device. + dv_dest.template modify<memory_space> (); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + double tmp = 0.0; + for (int j = 0; j < (int) idx.dimension_1(); ++j) { + const double val = src(idx(i,j)); + tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val); + } + dest(i) += tmp; + } +}; + +class ParticleType { + public: + double q; + double m; + double q_over_m; + KOKKOS_INLINE_FUNCTION + ParticleType(double q_ = -1, double m_ = 1): + q(q_), m(m_), q_over_m(q/m) {} +protected: +}; + + typedef Kokkos::DualView<ParticleType[10]> ParticleTypes; +int main (int narg, char* arg[]) { + Kokkos::initialize (narg, arg); + +// If View is non-trivial constructible type then add braces so it is out of scope +// before Kokkos::finalize() call +{ + ParticleTypes test("Test"); + Kokkos::fence(); + test.h_view(0) = ParticleType(-1e4,1); + Kokkos::fence(); + + int size = 1000000; + + // Create DualViews. This will allocate on both the device and its + // host_mirror_device. + idx_type idx ("Idx",size,64); + view_type dest ("Dest",size); + view_type src ("Src",size); + + + srand (134231); + + // Get a reference to the host view of idx directly (equivalent to + // idx.view<idx_type::host_mirror_space>() ) + idx_type::t_host h_idx = idx.h_view; + for (int i = 0; i < size; ++i) { + for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) { + h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size; + } + } + + // Mark idx as modified on the host_mirror_space so that a + // sync to the device will actually move data. The sync happens in + // the functor's constructor. + idx.modify<idx_type::host_mirror_space> (); + + // Run on the device. This will cause a sync of idx to the device, + // since it was marked as modified on the host. + Kokkos::Timer timer; + Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec1_dev = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec2_dev = timer.seconds(); + + // Run on the host's default execution space (could be the same as device). + // This will cause a sync back to the host of dest. Note that if the Device is CUDA, + // the data layout will not be optimal on host, so performance is + // lower than what it would be for a pure host compilation. + timer.reset(); + Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec1_host = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec2_host = timer.seconds(); + + printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev); + printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host); +} + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0ed569f9f48a02ebcca091adced52a8c3a1f2ad --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt @@ -0,0 +1,13 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF (Kokkos_ENABLE_Cuda_UVM) + # This is a tutorial, not a test, so we don't ask CTest to run it. + TRIBITS_ADD_EXECUTABLE( + tutorial_advancedviews_05_nvidia_uvm + SOURCES uvm_example.cpp + COMM serial mpi + DEPLIBS kokkoscontainers kokkoscore + ) +ENDIF () diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf5326b687199ff8c5c14580b18a9e406279cd11 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <Kokkos_DualView.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> +#include <cstdlib> + +typedef Kokkos::View<double*> view_type; +typedef Kokkos::View<int**> idx_type; + + +template<class Device> +struct localsum { + // Define the execution space for the functor (overrides the DefaultExecutionSpace) + typedef Device execution_space; + + // Get the view types on the particular device the functor is instantiated for + idx_type::const_type idx; + view_type dest; + Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::execution_space, Kokkos::MemoryRandomAccess > src; + + localsum(idx_type idx_, view_type dest_, + view_type src_):idx(idx_),dest(dest_),src(src_) { + } + + KOKKOS_INLINE_FUNCTION + void operator() (int i) const { + double tmp = 0.0; + for(int j = 0; j < idx.dimension_1(); j++) { + const double val = src(idx(i,j)); + tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val); + } + dest(i) += tmp; + } +}; + +int main(int narg, char* arg[]) { + Kokkos::initialize(narg,arg); + + int size = 1000000; + + // Create Views + idx_type idx("Idx",size,64); + view_type dest("Dest",size); + view_type src("Src",size); + + srand(134231); + + // When using UVM Cuda views can be accessed on the Host directly + for(int i=0; i<size; i++) { + for(int j=0; j<idx.dimension_1(); j++) + idx(i,j) = (size + i + (rand()%500 - 250))%size; + } + + Kokkos::fence(); + // Run on the device + // This will cause a sync of idx to the device since it was modified on the host + Kokkos::Timer timer; + Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec1_dev = timer.seconds(); + + // No data transfer will happen now, since nothing is accessed on the host + timer.reset(); + Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec2_dev = timer.seconds(); + + // Run on the host + // This will cause a sync back to the host of dest which was changed on the device + // Compare runtime here with the dual_view example: dest will be copied back in 4k blocks + // when they are accessed the first time during the parallel_for. Due to the latency of a memcpy + // this gives lower effective bandwidth when doing a manual copy via dual views + timer.reset(); + Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec1_host = timer.seconds(); + + // No data transfers will happen now + timer.reset(); + Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src)); + Kokkos::fence(); + double sec2_host = timer.seconds(); + + + + printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev); + printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host); + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..60a514f4d50ccf3e36fa2a8233de90c46f3bbe5d --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 --default-stream per-thread +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/overlapping_deepcopy.cpp b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/overlapping_deepcopy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5da3bf76c98ebc1cfbf4c6d81d3e6fc7d3e13171 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/overlapping_deepcopy.cpp @@ -0,0 +1,148 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> +#include <typeinfo> +#include <cmath> +#include <impl/Kokkos_Timer.hpp> + +struct FillDevice { + double value; + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a; + FillDevice(const double& val, const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a): + value(val),a(d_a){} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + a(i) = value; + } +}; + +struct ComputeADevice { + int iter; + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a; + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b; + ComputeADevice(const int& iter_, + const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a, + const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b): + iter(iter_),a(d_a),b(d_b){} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + for(int j=1;j<iter;j++) { + a(i) += std::pow(b(i),1.0+1.0/iter); + } + } +}; + +struct ComputeAHost { + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> a; + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> b; + ComputeAHost( const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_a, + const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_b): + a(d_a),b(d_b){} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + a(i) += b(i); + } +}; + +struct MergeDevice { + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a; + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b; + MergeDevice( + const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a, + const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b): + a(d_a),b(d_b){} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + a(i) += b(i); + } +}; + +int main(int argc, char * argv[]) { + int size = 100000000; + Kokkos::initialize(); + int synch = atoi(argv[1]); + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_a("Device A",size); + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_b("Device B",size); + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_tmp("Device tmp",size); + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_a("Host A",size); + Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_b("Host B",size); + + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(0.0,d_a)); + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(1.3513,d_b)); + Kokkos::fence(); + Kokkos::Timer timer; + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),ComputeADevice(20,d_a,d_b)); + + if(synch==1) + Kokkos::deep_copy(Kokkos::OpenMP(),h_b,d_b); + if(synch==2) + Kokkos::deep_copy(h_b,d_b); + + + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),[=] (const int& i) { + h_a(i) = 0.0; + }); + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),ComputeAHost(h_a,h_b)); + Kokkos::OpenMP::fence(); + if(synch==1) + Kokkos::deep_copy(Kokkos::OpenMP(), d_tmp,h_a); + if(synch==2) + Kokkos::deep_copy(d_tmp,h_a); + Kokkos::fence(); + + std::cout << "Time " << timer.seconds() << std::endl; + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),MergeDevice(d_a,d_tmp)); + + Kokkos::deep_copy(h_a,d_a); + std::cout << "h_a(0): " << h_a(0) << " ( Correct: 27.4154 )" << std::endl; + Kokkos::finalize(); +} + + + diff --git a/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4f1addc5553d9ce7131456f02af664554757daa --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt @@ -0,0 +1,9 @@ + +TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_data_layouts) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_memory_traits) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_subviews) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(04_dualviews) + +IF (Kokkos_ENABLE_Cuda_UVM) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(05_NVIDIA_UVM) +ENDIF () diff --git a/lib/kokkos/example/tutorial/Advanced_Views/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..19053b61b037f6a21f1be0874b1c23cbbb02a234 --- /dev/null +++ b/lib/kokkos/example/tutorial/Advanced_Views/Makefile @@ -0,0 +1,84 @@ +default: + cd ./01_data_layouts; \ + make -j 4 + cd ./02_memory_traits; \ + make -j 4 + cd ./03_subviews; \ + make -j 4 + cd ./04_dualviews; \ + make -j 4 + cd ./05_NVIDIA_UVM; \ + make -j 4 + cd ./06_AtomicViews; \ + make -j 4 + +openmp: + cd ./01_data_layouts; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./02_memory_traits; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./03_subviews; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./04_dualviews; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./05_NVIDIA_UVM; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./06_AtomicViews; \ + make -j 4 KOKKOS_DEVICES=OpenMP + +pthreads: + cd ./01_data_layouts; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./02_memory_traits; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./03_subviews; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./04_dualviews; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./05_NVIDIA_UVM; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./06_AtomicViews; \ + make -j 4 KOKKOS_DEVICES=Pthreads + +serial: + cd ./01_data_layouts; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./02_memory_traits; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./03_subviews; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./04_dualviews; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./05_NVIDIA_UVM; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./06_AtomicViews; \ + make -j 4 KOKKOS_DEVICES=Serial + +cuda: + cd ./01_data_layouts; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./02_memory_traits; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./03_subviews; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./04_dualviews; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./05_NVIDIA_UVM; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./06_AtomicViews; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + +clean: + cd ./01_data_layouts; \ + make clean + cd ./02_memory_traits; \ + make clean + cd ./03_subviews; \ + make clean + cd ./04_dualviews; \ + make clean + cd ./05_NVIDIA_UVM; \ + make clean + cd ./06_AtomicViews; \ + make clean + diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e6175a75652d54af1f0ad3c3c818485ccc59b07 --- /dev/null +++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp @@ -0,0 +1,152 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <Kokkos_Random.hpp> +#include <Kokkos_DualView.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdlib> + +typedef Kokkos::HostSpace::execution_space DefaultHostType; + +// Kokkos provides two different random number generators with a 64 bit and a 1024 bit state. +// These generators are based on Vigna, Sebastiano (2014). "An experimental exploration of Marsaglia's xorshift generators, scrambled" +// See: http://arxiv.org/abs/1402.6246 +// The generators can be used fully independently on each thread and have been tested to +// produce good statistics for both inter and intra thread numbers. +// Note that within a kernel NO random number operations are (team) collective operations. +// Everything can be called within branches. This is a difference to the curand library where +// certain operations are required to be called by all threads in a block. +// +// In Kokkos you are required to create a pool of generator states, so that threads can +// grep their own. On CPU architectures the pool size is equal to the thread number, +// on CUDA about 128k states are generated (enough to give every potentially simultaneously +// running thread its own state). With a kernel a thread is required to aquire a state from the +// pool and later return it. +// On CPUs the Random number generator is deterministic if using the same number of threads. +// On GPUs (i.e. using the CUDA backend it is not deterministic because threads aquire states via +// atomics. + +// A Functor for generating uint64_t random numbers templated on the GeneratorPool type +template<class GeneratorPool> +struct generate_random { + + // The GeneratorPool + GeneratorPool rand_pool; + + // Output View for the random numbers + Kokkos::View<uint64_t*> vals; + int samples; + + // Initialize all members + generate_random(Kokkos::View<uint64_t*> vals_, + GeneratorPool rand_pool_, + int samples_): + vals(vals_),rand_pool(rand_pool_),samples(samples_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (int i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + // Draw samples numbers from the pool as urand64 between 0 and rand_pool.MAX_URAND64 + // Note there are function calls to get other type of scalars, and also to specify + // Ranges or get a normal distributed float. + for(int k = 0;k<samples;k++) + vals(i*samples+k) = rand_gen.urand64(); + + // Give the state back, which will allow another thread to aquire it + rand_pool.free_state(rand_gen); + } +}; + + + + +int main(int argc, char* args[]) { + if (argc != 3){ + printf("Please pass two integers on the command line\n"); + } + else { + + // Initialize Kokkos + Kokkos::initialize(argc,args); + int size = atoi(args[1]); + int samples = atoi(args[2]); + + // Create two random number generator pools one for 64bit states and one for 1024 bit states + // Both take an 64 bit unsigned integer seed to initialize a Random_XorShift64 generator which + // is used to fill the generators of the pool. + Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); + Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857); + Kokkos::DualView<uint64_t*> vals("Vals",size*samples); + + // Run some performance comparisons + Kokkos::Timer timer; + Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples)); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples)); + Kokkos::fence(); + double time_64 = timer.seconds(); + + Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples)); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples)); + Kokkos::fence(); + double time_1024 = timer.seconds(); + + printf("#Time XorShift64*: %lf %lf\n",time_64,1.0e-9*samples*size/time_64 ); + printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 ); + + Kokkos::deep_copy(vals.h_view,vals.d_view); + + Kokkos::finalize(); + } + return 0; +} + + diff --git a/lib/kokkos/example/tutorial/Algorithms/Makefile b/lib/kokkos/example/tutorial/Algorithms/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..edc2a36024fc24a791a27064e4f36febfec81c1a --- /dev/null +++ b/lib/kokkos/example/tutorial/Algorithms/Makefile @@ -0,0 +1,24 @@ +default: + cd ./01_random_numbers; \ + make -j 4 + +openmp: + cd ./01_random_numbers; \ + make -j 4 KOKKOS_DEVICES=OpenMP + +pthreads: + cd ./01_random_numbers; \ + make -j 4 KOKKOS_DEVICES=Pthreads + +serial: + cd ./01_random_numbers; \ + make -j 4 KOKKOS_DEVICES=Serial + +cuda: + cd ./01_random_numbers; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + +clean: + cd ./01_random_numbers; \ + make clean + diff --git a/lib/kokkos/example/tutorial/CMakeLists.txt b/lib/kokkos/example/tutorial/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1fd4c0ae9b31f01c8fb351e5730cf2a452655fe --- /dev/null +++ b/lib/kokkos/example/tutorial/CMakeLists.txt @@ -0,0 +1,17 @@ + +TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_hello_world) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_simple_view) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(04_simple_memoryspaces) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(05_simple_atomics) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(Advanced_Views) +TRIBITS_ADD_EXAMPLE_DIRECTORIES(Hierarchical_Parallelism) + +IF (Kokkos_ENABLE_CXX11) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda) +ENDIF () + + + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d8a514a4549aad63f735721b41e47516a570070 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_hierarchicalparallelism_01_thread_teams + SOURCES thread_teams.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2de0809a796128e76e44cdb32837f4c82c98022 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// Using default execution space define a TeamPolicy and its member_type +// The member_type is what the operator of a functor or Lambda gets, for +// a simple RangePolicy the member_type is simply an integer +// For a TeamPolicy its a much richer object, since it provides all information +// to identify a thread uniquely and some team related function calls such as a +// barrier (which will be used in a subsequent example). +// A ThreadTeam consists of 1 to n threads where the maxmimum value of n is +// determined by the hardware. On a dual socket CPU machine with 8 cores per socket +// the maximum size of a team is 8. The number of teams (i.e. the league_size) is +// not limited by physical constraints. Its a pure logical number. + +typedef Kokkos::TeamPolicy<> team_policy ; +typedef team_policy::member_type team_member ; + +// Define a functor which can be launched using the TeamPolicy +struct hello_world { + typedef int value_type; //Specify value type for reduction target, sum + + // This is a reduction operator which now takes as first argument the + // TeamPolicy member_type. Every member of the team contributes to the + // total sum. + // It is helpful to think of this operator as a parallel region for a team + // (i.e. every team member is active and will execute the code). + KOKKOS_INLINE_FUNCTION + void operator() ( const team_member & thread, int& sum) const { + sum+=1; + // The TeamPolicy<>::member_type provides functions to query the multi + // dimensional index of a thread as well as the number of thread-teams and the size + // of each team. + printf("Hello World: %i %i // %i %i\n",thread.league_rank(),thread.team_rank(),thread.league_size(),thread.team_size()); + } +}; + +int main(int narg, char* args[]) { + Kokkos::initialize(narg,args); + + // Launch 12 teams of the maximum number of threads per team + const team_policy policy( 12 , team_policy::team_size_max( hello_world() ) ); + + int sum = 0; + Kokkos::parallel_reduce( policy , hello_world() , sum ); + + // The result will be 12*team_policy::team_size_max( hello_world()) + printf("Result %i\n",sum); + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec7f1e1159fcf7f12209defea154c494fb48540e --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt @@ -0,0 +1,13 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +IF (Kokkos_ENABLE_CXX11) + # This is a tutorial, not a test, so we don't ask CTest to run it. + TRIBITS_ADD_EXECUTABLE( + tutorial_hierarchical_01_thread_teams_lambda + SOURCES thread_teams_lambda.cpp + COMM serial mpi + ) +ENDIF () + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..965b72b4e9a7aac83f1a748d3f0c4fe611aafabb --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +KOKKOS_CUDA_OPTIONS = "enable_lambda" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..565dd22e82849fde2fe527f25179ae49346222f9 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// Demonstrate a parallel reduction using thread teams (TeamPolicy). +// +// A thread team consists of 1 to n threads. The hardware determines +// the maxmimum value of n. On a dual-socket CPU machine with 8 cores +// per socket, the maximum size of a team is 8. The number of teams +// (the league_size) is not limited by physical constraints (up to +// some reasonable bound, which eventually depends upon the hardware +// and programming model implementation). + +int main (int narg, char* args[]) { + using Kokkos::parallel_reduce; + typedef Kokkos::TeamPolicy<> team_policy; + typedef typename team_policy::member_type team_member; + + Kokkos::initialize (narg, args); + + // Set up a policy that launches 12 teams, with the maximum number + // of threads per team. + + const team_policy policy (12, Kokkos::AUTO); + + // This is a reduction with a team policy. The team policy changes + // the first argument of the lambda. Rather than an integer index + // (as with RangePolicy), it's now TeamPolicy::member_type. This + // object provides all information to identify a thread uniquely. + // It also provides some team-related function calls such as a team + // barrier (which a subsequent example will use). + // + // Every member of the team contributes to the total sum. It is + // helpful to think of the lambda's body as a "team parallel + // region." That is, every team member is active and will execute + // the body of the lambda. + int sum = 0; + parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) { + lsum += 1; + // TeamPolicy<>::member_type provides functions to query the + // multidimensional index of a thread, as well as the number of + // thread teams and the size of each team. + printf ("Hello World: %i %i // %i %i\n", thread.league_rank (), + thread.team_rank (), thread.league_size (), thread.team_size ()); + }, sum); + + // The result will be 12*team_policy::team_size_max([=]{}) + printf ("Result %i\n",sum); + + Kokkos::finalize (); +} + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e660405345167858b985261362d6135d5e6d5c4d --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_hierarchicalparallelism_02_nested_parallel_for + SOURCES nested_parallel_for.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4357b4fa1a19b6b36fed42fe3f0a2b526b1fa8b4 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +// See 01_thread_teams for an explanation of a basic TeamPolicy +typedef Kokkos::TeamPolicy<> team_policy ; +typedef typename team_policy::member_type team_member ; + +struct hello_world { + typedef int value_type; //Specify value type for reduction target, sum + KOKKOS_INLINE_FUNCTION + void operator() ( const team_member & thread, int& sum) const { + sum+=1; + // When using the TeamPolicy Kokkos allows for nested parallel loops. + // All three Kokkos parallel patterns are allowed (for, reduce, scan) and they + // largely follow the same syntax as on the global level. + // The execution policy for the Thread level nesting (the Vector level is in the next + // tutorial example) is Kokkos::TeamThreadRange. This means the loop will be executed + // by all members of the team and the loop count will be split between threads of the + // team. Its arguments are the team_member, and a loop count. + // Not every thread will do the same amount of iterations. On a GPU for example with + // a team_size() larger than 31 only the first 31 threads would actually do anything. + // On a CPU with 8 threads 7 would execute 4 loop iterations, and 1 thread would do + // 3. Note also that the mode of splitting the count is architecture dependent similar + // to what the RangePolicy on a global level does. + // The call itself is not guaranteed to be synchronous. Also keep in mind that the + // operator using a team_policy acts like a parallel region for the team. That means + // that everything outside of the nested parallel_for is also executed by all threads + // of the team. + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,31), [&] (const int& i) { + printf("Hello World: (%i , %i) executed loop %i \n",thread.league_rank(),thread.team_rank(),i); + }); + } +}; + +int main(int narg, char* args[]) { + Kokkos::initialize(narg,args); + + // Launch 3 teams of the maximum number of threads per team + const team_policy policy( 3 , team_policy::team_size_max( hello_world() ) ); + + int sum = 0; + Kokkos::parallel_reduce( policy , hello_world() , sum ); + printf("Result %i\n",sum); + + Kokkos::finalize(); +} diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea6b0b1e42694c2b0b5994b54309e19647a09e5f --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt @@ -0,0 +1,16 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. + +IF(Kokkos_ENABLE_CXX11) + +TRIBITS_ADD_EXECUTABLE( + tutorial_hierarchicalparallelism_03_vectorization + SOURCES vectorization.cpp + COMM serial mpi + ) + +ENDIF() + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/vectorization.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/vectorization.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99d5958edfe12aba723b4d5455328313009fc8f6 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/vectorization.cpp @@ -0,0 +1,162 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <Kokkos_Random.hpp> +#include <cstdio> + +#ifdef KOKKOS_HAVE_CXX11 + +// The TeamPolicy actually supports 3D parallelism: Teams, Threads, Vector +// Kokkos::parallel_{for/reduce/scan} calls can be completely free nested. +// The execution policies for the nested layers are TeamThreadRange and +// ThreadVectorRange. +// The only restriction on nesting is that a given level can only be nested in a +// higher one. e.g. a ThreadVectorRange can be nested inside a TeamPolicy operator +// and inside a TeamThreadRange, but you can not nest a ThreadVectorRange or a +// TeamThreadRange inside another ThreadVectorRange. +// As with the 2D execution of TeamPolicy the operator has to be considered as +// a parallel region even with respect to VectorLanes. That means even outside +// a TeamThread or VectorThread loop all threads of a team and all vector lanes +// of a thread execute every line of the operator as long as there are no restricitons +// on them. +// Code lines can be restricted using Kokkos::single to either execute once PerThread +// or execute once PerTeam. +typedef typename Kokkos::TeamPolicy<>::member_type team_member ; + +struct SomeCorrelation { + typedef int value_type; //Specify value type for reduction target, sum + typedef Kokkos::DefaultExecutionSpace::scratch_memory_space shared_space; + typedef Kokkos::View<int*,shared_space,Kokkos::MemoryUnmanaged> shared_1d_int; + + Kokkos::View<const int***,Kokkos::LayoutRight> data; + Kokkos::View<int> gsum; + + SomeCorrelation(Kokkos::View<int***,Kokkos::LayoutRight> data_in, + Kokkos::View<int> sum):data(data_in),gsum(sum){} + + KOKKOS_INLINE_FUNCTION + void operator() ( const team_member & thread) const { + int i = thread.league_rank(); + + // Allocate a shared array for the team. + shared_1d_int count(thread.team_shmem(),data.dimension_1()); + + // With each team run a parallel_for with its threads + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,data.dimension_1()), [=] (const int& j) { + int tsum; + // Run a vector loop reduction over the inner dimension of data + // Count how many values are multiples of 4 + // Every vector lane gets the same reduction value (tsum) back, it is broadcast to all vector lanes + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(thread,data.dimension_2()), [=] (const int& k, int & vsum) { + vsum+= (data(i,j,k) % 4 == 0)?1:0; + },tsum); + + // Make sure only one vector lane adds the reduction value to the shared array, i.e. execute + // the next line only once PerThread + Kokkos::single(Kokkos::PerThread(thread),[=] () { + count(j) = tsum; + }); + }); + + // Wait for all threads to finish the parallel_for so that all shared memory writes are done + thread.team_barrier(); + + // Check with one vector lane from each thread how many consecutive + // data segments have the same number of values divisible by 4 + // The team reduction value is again broadcast to every team member (and every vector lane) + int team_sum = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(thread, data.dimension_1()-1), [=] (const int& j, int& thread_sum) { + // It is not valid to directly add to thread_sum + // Use a single function with broadcast instead + // team_sum will be used as input to the operator (i.e. it is used to initialize sum) + // the end value of sum will be broadcast to all vector lanes in the thread. + Kokkos::single(Kokkos::PerThread(thread),[=] (int& sum) { + if(count(j)==count(j+1)) sum++; + },thread_sum); + },team_sum); + + // Add with one thread and vectorlane of the team the team_sum to the global value + Kokkos::single(Kokkos::PerTeam(thread),[=] () { + Kokkos::atomic_add(&gsum(),team_sum); + }); + } + + // The functor needs to define how much shared memory it requests given a team_size. + size_t team_shmem_size( int team_size ) const { + return shared_1d_int::shmem_size(data.dimension_1()); + } +}; + +int main(int narg, char* args[]) { + Kokkos::initialize(narg,args); + + // Produce some 3D random data (see Algorithms/01_random_numbers for more info) + Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32); + Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); + Kokkos::fill_random(data,rand_pool64,100); + + // A global value to put the result in + Kokkos::View<int> gsum("Sum"); + + // Each team handles a slice of the data + // Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes. + // Kokkos::AUTO will determine the number of threads + // The maximum vector length is hardware dependent but can always be smaller than the hardware allows. + // The vector length must be a power of 2. + + const Kokkos::TeamPolicy<> policy( 512 , Kokkos::AUTO , 16); + + Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) ); + + Kokkos::fence(); + + // Copy result value back + int sum = 0; + Kokkos::deep_copy(sum,gsum); + printf("Result %i\n",sum); + + Kokkos::finalize(); +} + +#endif //KOKKOS_HAVE_CXX11 diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..15ad5d780340dd0e10c338530f7c88222e742169 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_hierarchicalparallelism_04_team_scan + SOURCES team_scan.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..12ad36b31e458d155aa6dc653ab8188a7773bd18 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ../../../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ../../../../config/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.cuda) +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = $(SRC:.cpp=.host) +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c12b11d04ddc99957ec4be93c3928b9c3558cb92 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp @@ -0,0 +1,141 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <Kokkos_DualView.hpp> +#include <impl/Kokkos_Timer.hpp> +#include <cstdio> +#include <cstdlib> + +typedef Kokkos::DefaultExecutionSpace Device ; +typedef Kokkos::HostSpace::execution_space Host ; + +typedef Kokkos::TeamPolicy< Device > team_policy ; +typedef team_policy::member_type team_member ; + +static const int TEAM_SIZE = 16 ; + +struct find_2_tuples { + int chunk_size; + Kokkos::View<const int*> data; + Kokkos::View<int**> histogram; + + find_2_tuples(int chunk_size_, Kokkos::DualView<int*> data_, + Kokkos::DualView<int**> histogram_):chunk_size(chunk_size_), + data(data_.d_view),histogram(histogram_.d_view) { + data_.sync<Device>(); + histogram_.sync<Device>(); + histogram_.modify<Device>(); + } + + KOKKOS_INLINE_FUNCTION + void operator() ( const team_member & dev) const { + Kokkos::View<int**,Kokkos::MemoryUnmanaged> l_histogram(dev.team_shmem(),TEAM_SIZE,TEAM_SIZE); + Kokkos::View<int*,Kokkos::MemoryUnmanaged> l_data(dev.team_shmem(),chunk_size+1); + + const int i = dev.league_rank() * chunk_size; + for(int j = dev.team_rank(); j<chunk_size+1; j+=dev.team_size()) + l_data(j) = data(i+j); + + for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size()) + for(int l = 0; l < TEAM_SIZE; l++) + l_histogram(k,l) = 0; + dev.team_barrier(); + + for(int j = 0; j<chunk_size; j++) { + for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size()) + for(int l = 0; l < TEAM_SIZE; l++) { + if((l_data(j) == k) && (l_data(j+1)==l)) + l_histogram(k,l)++; + } + } + + for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size()) + for(int l = 0; l < TEAM_SIZE; l++) { + Kokkos::atomic_fetch_add(&histogram(k,l),l_histogram(k,l)); + } + dev.team_barrier(); + } + size_t team_shmem_size( int team_size ) const { return sizeof(int)*(chunk_size+2 + team_size * team_size ); } +}; + +int main(int narg, char* args[]) { + Kokkos::initialize(narg,args); + + int chunk_size = 1024; + int nchunks = 100000; //1024*1024; + Kokkos::DualView<int*> data("data",nchunks*chunk_size+1); + + srand(1231093); + + for(int i = 0; i < (int) data.dimension_0(); i++) { + data.h_view(i) = rand()%TEAM_SIZE; + } + data.modify<Host>(); + data.sync<Device>(); + + Kokkos::DualView<int**> histogram("histogram",TEAM_SIZE,TEAM_SIZE); + + + Kokkos::Timer timer; + // threads/team is automatically limited to maximum supported by the device. + Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE ) + , find_2_tuples(chunk_size,data,histogram) ); + Kokkos::fence(); + double time = timer.seconds(); + + histogram.sync<Host>(); + + printf("Time: %f \n\n",time); + int sum = 0; + for(int k=0; k<TEAM_SIZE; k++) { + for(int l=0; l<TEAM_SIZE; l++) { + printf("%i ",histogram.h_view(k,l)); + sum += histogram.h_view(k,l); + } + printf("\n"); + } + printf("Result: %i %i\n",sum,chunk_size*nchunks); + Kokkos::finalize(); +} + diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e03d7aeb901871aec70c712808dea9c322cd6176 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt @@ -0,0 +1,8 @@ + +TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams) + +IF (Kokkos_ENABLE_CXX11) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams_lambda) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_nested_parallel_for) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_vectorization) +ENDIF () diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9d6fff7981806a6d28d7704f9d4a0e6c776c8ed0 --- /dev/null +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile @@ -0,0 +1,72 @@ +default: + cd ./01_thread_teams; \ + make -j 4 + cd ./01_thread_teams_lambda; \ + make -j 4 + cd ./02_nested_parallel_for; \ + make -j 4 + cd ./03_vectorization; \ + make -j 4 + cd ./04_team_scan; \ + make -j 4 + +openmp: + cd ./01_thread_teams; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./01_thread_teams_lambda; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./02_nested_parallel_for; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./03_vectorization; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./04_team_scan; \ + make -j 4 KOKKOS_DEVICES=OpenMP + +pthreads: + cd ./01_thread_teams; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./01_thread_teams_lambda; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./02_nested_parallel_for; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./03_vectorization; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./04_team_scan; \ + make -j 4 KOKKOS_DEVICES=Pthreads + +serial: + cd ./01_thread_teams; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./01_thread_teams_lambda; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./02_nested_parallel_for; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./03_vectorization; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./04_team_scan; \ + make -j 4 KOKKOS_DEVICES=Serial + +cuda: + cd ./01_thread_teams; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./01_thread_teams_lambda; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./02_nested_parallel_for; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./03_vectorization; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./04_team_scan; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + +clean: + cd ./01_thread_teams; \ + make clean + cd ./01_thread_teams_lambda; \ + make clean + cd ./02_nested_parallel_for; \ + make clean + cd ./03_vectorization; \ + make clean + cd ./04_team_scan; \ + make clean + diff --git a/lib/kokkos/example/tutorial/Makefile b/lib/kokkos/example/tutorial/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..300d98ab44340404b31dfb8690ce2a5577b55636 --- /dev/null +++ b/lib/kokkos/example/tutorial/Makefile @@ -0,0 +1,144 @@ +default: + cd ./01_hello_world; \ + make -j 4 + cd ./01_hello_world_lambda; \ + make -j 4 + cd ./02_simple_reduce; \ + make -j 4 + cd ./02_simple_reduce_lambda; \ + make -j 4 + cd ./03_simple_view; \ + make -j 4 + cd ./03_simple_view_lambda; \ + make -j 4 + cd ./04_simple_memoryspaces; \ + make -j 4 + cd ./05_simple_atomics; \ + make -j 4 + cd ./Advanced_Views; \ + make -j 4 + cd ./Algorithms; \ + make -j 4 + cd ./Hierarchical_Parallelism; \ + make -j 4 + +openmp: + cd ./01_hello_world; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./01_hello_world_lambda; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./02_simple_reduce; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./02_simple_reduce_lambda; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./03_simple_view; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./03_simple_view_lambda; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./04_simple_memoryspaces; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./05_simple_atomics; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./Advanced_Views; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./Algorithms; \ + make -j 4 KOKKOS_DEVICES=OpenMP + cd ./Hierarchical_Parallelism; \ + make -j 4 KOKKOS_DEVICES=OpenMP + +pthreads: + cd ./01_hello_world; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./01_hello_world_lambda; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./02_simple_reduce; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./02_simple_reduce_lambda; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./03_simple_view; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./03_simple_view_lambda; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./04_simple_memoryspaces; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./05_simple_atomics; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./Advanced_Views; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./Algorithms; \ + make -j 4 KOKKOS_DEVICES=Pthreads + cd ./Hierarchical_Parallelism; \ + make -j 4 KOKKOS_DEVICES=Pthreads + +serial: + cd ./01_hello_world; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./01_hello_world_lambda; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./02_simple_reduce; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./02_simple_reduce_lambda; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./03_simple_view; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./03_simple_view_lambda; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./04_simple_memoryspaces; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./05_simple_atomics; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./Advanced_Views; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./Algorithms; \ + make -j 4 KOKKOS_DEVICES=Serial + cd ./Hierarchical_Parallelism; \ + make -j 4 KOKKOS_DEVICES=Serial + +cuda: + cd ./01_hello_world; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./01_hello_world_lambda; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./02_simple_reduce; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./02_simple_reduce_lambda; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./03_simple_view; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./03_simple_view_lambda; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./04_simple_memoryspaces; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./05_simple_atomics; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./Advanced_Views; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./Algorithms; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + cd ./Hierarchical_Parallelism; \ + make -j 4 KOKKOS_DEVICES=Cuda,Serial + +clean: + cd ./01_hello_world; \ + make clean + cd ./01_hello_world_lambda; \ + make clean + cd ./02_simple_reduce; \ + make clean + cd ./02_simple_reduce_lambda; \ + make clean + cd ./03_simple_view; \ + make clean + cd ./03_simple_view_lambda; \ + make clean + cd ./04_simple_memoryspaces; \ + make clean + cd ./05_simple_atomics; \ + make clean + cd ./Advanced_Views; \ + make clean + cd ./Algorithms; \ + make clean + cd ./Hierarchical_Parallelism; \ + make clean + diff --git a/lib/kokkos/example/tutorial/README b/lib/kokkos/example/tutorial/README new file mode 100644 index 0000000000000000000000000000000000000000..4ba0b3a5d9e15e3c58326559d7a7f30e5b51ea4c --- /dev/null +++ b/lib/kokkos/example/tutorial/README @@ -0,0 +1,17 @@ +Build the examples by typing in each directory: +make -j 16 + +To specify a target device: +make openmp -j 16 +make pthreads -j 16 +make serial -j 16 +make cuda -j 16 + +The lambda variants can not be build with CUDA=yes at the moment, since +CUDA does not support lambdas from the host. +Some of the advanced topics try to highlight performance impacts by timing +different variants of doing the same thing. +Also some of the advanced topics (in particular hierarchical parallelism) +require C++11 even with out using host side lambdas. CUDA 6.5 can be used +to compile those. + diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash new file mode 100755 index 0000000000000000000000000000000000000000..86f136da96ed10e0a2f23c0cb2752eaaa5287d90 --- /dev/null +++ b/lib/kokkos/generate_makefile.bash @@ -0,0 +1,336 @@ +#!/bin/bash + +KOKKOS_DEVICES="" + +while [[ $# > 0 ]] +do +key="$1" + +case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --prefix*) + PREFIX="${key#*=}" + ;; + --with-cuda) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH_NVCC=`which nvcc` + CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} + ;; + # Catch this before '--with-cuda*' + --with-cuda-options*) + KOKKOS_CUDA_OPT="${key#*=}" + ;; + --with-cuda*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH="${key#*=}" + ;; + --with-openmp) + KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" + ;; + --with-pthread) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" + ;; + --with-serial) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" + ;; + --with-qthread*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthread" + QTHREAD_PATH="${key#*=}" + ;; + --with-devices*) + DEVICES="${key#*=}" + KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" + ;; + --with-gtest*) + GTEST_PATH="${key#*=}" + ;; + --with-hwloc*) + HWLOC_PATH="${key#*=}" + ;; + --arch*) + KOKKOS_ARCH="${key#*=}" + ;; + --cxxflags*) + CXXFLAGS="${key#*=}" + ;; + --ldflags*) + LDFLAGS="${key#*=}" + ;; + --debug|-dbg) + KOKKOS_DEBUG=yes + ;; + --compiler*) + COMPILER="${key#*=}" + ;; + --with-options*) + KOKKOS_OPT="${key#*=}" + ;; + --help) + echo "Kokkos configure options:" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo "--prefix=/Install/Path: Path to where the Kokkos library should be installed" + echo "" + echo "--with-cuda[=/Path/To/Cuda]: enable Cuda and set path to Cuda Toolkit" + echo "--with-openmp: enable OpenMP backend" + echo "--with-pthread: enable Pthreads backend" + echo "--with-serial: enable Serial backend" + echo "--with-qthread=/Path/To/Qthread: enable Qthread backend" + echo "--with-devices: explicitly add a set of backends" + echo "" + echo "--arch=[OPTIONS]: set target architectures. Options are:" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Power8 = IBM Power 8 CPUs" + echo "" + echo "--compiler=/Path/To/Compiler set the compiler" + echo "--debug,-dbg: enable Debugging" + echo "--cxxflags=[FLAGS] overwrite CXXFLAGS for library build and test build" + echo " This will still set certain required flags via" + echo " KOKKOS_CXXFLAGS (such as -fopenmp, --std=c++11, etc.)" + echo "--ldflags=[FLAGS] overwrite LDFLAGS for library build and test build" + echo " This will still set certain required flags via" + echo " KOKKOS_LDFLAGS (such as -fopenmp, -lpthread, etc.)" + echo "--with-gtest=/Path/To/Gtest: set path to gtest (used in unit and performance tests" + echo "--with-hwloc=/Path/To/Hwloc: set path to hwloc" + echo "--with-options=[OPTIONS]: additional options to Kokkos:" + echo " aggressive_vectorization = add ivdep on loops" + echo "--with-cuda-options=[OPTIONS]: additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc" + exit 0 + ;; + *) + echo "warning: ignoring unknown option $key" + ;; +esac +shift +done + +# If KOKKOS_PATH undefined, assume parent dir of this +# script is the KOKKOS_PATH +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then +echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" +exit +fi + +KOKKOS_OPTIONS="KOKKOS_PATH=${KOKKOS_PATH}" + +if [ ${#COMPILER} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXX=${COMPILER}" +fi +if [ ${#PREFIX} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} PREFIX=${PREFIX}" +fi +if [ ${#KOKKOS_DEVICES} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" +fi +if [ ${#KOKKOS_ARCH} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_ARCH=${KOKKOS_ARCH}" +fi +if [ ${#KOKKOS_DEBUG} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" +fi +if [ ${#CUDA_PATH} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CUDA_PATH=${CUDA_PATH}" +fi +if [ ${#CXXFLAGS} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXXFLAGS=\"${CXXFLAGS}\"" +fi +if [ ${#LDFLAGS} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} LDFLAGS=\"${LDFLAGS}\"" +fi +if [ ${#GTEST_PATH} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}" +else +GTEST_PATH=${KOKKOS_PATH}/tpls/gtest +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}" +fi +if [ ${#HWLOC_PATH} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" +fi +if [ ${#QTHREAD_PATH} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} QTHREAD_PATH=${QTHREAD_PATH}" +fi +if [ ${#KOKKOS_OPT} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_OPTIONS=${KOKKOS_OPT}" +fi +if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then +KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" +fi +mkdir core +mkdir core/unit_test +mkdir core/perf_test +mkdir containers +mkdir containers/unit_tests +mkdir containers/performance_tests +mkdir algorithms +mkdir algorithms/unit_tests +mkdir algorithms/performance_tests +mkdir example +mkdir example/fixture +mkdir example/feint +mkdir example/fenl + +if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then +mkdir example/ichol +fi + +# Generate subdirectory makefiles. +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "all:" >> core/unit_test/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS}" >> core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "test: all" >> core/unit_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} test" >> core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "clean:" >> core/unit_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/unit_test/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "all:" >> core/perf_test/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS}" >> core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "test: all" >> core/perf_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} test" >> core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "clean:" >> core/perf_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/perf_test/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "all:" >> containers/unit_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "test: all" >> containers/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "clean:" >> containers/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/unit_tests/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "all:" >> containers/performance_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "test: all" >> containers/performance_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "clean:" >> containers/performance_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/performance_tests/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "all:" >> algorithms/unit_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "test: all" >> algorithms/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "clean:" >> algorithms/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> algorithms/unit_tests/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "all:" >> example/fixture/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS}" >> example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "test: all" >> example/fixture/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} test" >> example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "clean:" >> example/fixture/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} clean" >> example/fixture/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/feint/Makefile +echo "" >> example/feint/Makefile +echo "all:" >> example/feint/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS}" >> example/feint/Makefile +echo "" >> example/feint/Makefile +echo "test: all" >> example/feint/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} test" >> example/feint/Makefile +echo "" >> example/feint/Makefile +echo "clean:" >> example/feint/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} clean" >> example/feint/Makefile + +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "all:" >> example/fenl/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS}" >> example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "test: all" >> example/fenl/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} test" >> example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "clean:" >> example/fenl/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} clean" >> example/fenl/Makefile + +if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/ichol/Makefile +echo "" >> example/ichol/Makefile +echo "all:" >> example/ichol/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS}" >> example/ichol/Makefile +echo "" >> example/ichol/Makefile +echo "test: all" >> example/ichol/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} test" >> example/ichol/Makefile +echo "" >> example/ichol/Makefile +echo "clean:" >> example/ichol/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} clean" >> example/ichol/Makefile +fi + +# Generate top level directory makefile. +echo "Generating Makefiles with options " ${KOKKOS_OPTIONS} +echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > Makefile +echo "" >> Makefile +echo "lib:" >> Makefile +echo -e "\tcd core; \\" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS}" >> Makefile +echo "" >> Makefile +echo "install: lib" >> Makefile +echo -e "\tcd core; \\" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} install" >> Makefile +echo "" >> Makefile +echo "build-test:" >> Makefile +echo -e "\tmake -C core/unit_test" >> Makefile +echo -e "\tmake -C core/perf_test" >> Makefile +echo -e "\tmake -C containers/unit_tests" >> Makefile +echo -e "\tmake -C containers/performance_tests" >> Makefile +echo -e "\tmake -C algorithms/unit_tests" >> Makefile +echo -e "\tmake -C example/fixture" >> Makefile +echo -e "\tmake -C example/feint" >> Makefile +echo -e "\tmake -C example/fenl" >> Makefile +echo "" >> Makefile +echo "test: build-test" >> Makefile +echo -e "\tmake -C core/unit_test test" >> Makefile +echo -e "\tmake -C core/perf_test test" >> Makefile +echo -e "\tmake -C containers/unit_tests test" >> Makefile +echo -e "\tmake -C containers/performance_tests test" >> Makefile +echo -e "\tmake -C algorithms/unit_tests test" >> Makefile +echo -e "\tmake -C example/fixture test" >> Makefile +echo -e "\tmake -C example/feint test" >> Makefile +echo -e "\tmake -C example/fenl test" >> Makefile +echo "" >> Makefile +echo "clean:" >> Makefile +echo -e "\tmake -C core/unit_test clean" >> Makefile +echo -e "\tmake -C core/perf_test clean" >> Makefile +echo -e "\tmake -C containers/unit_tests clean" >> Makefile +echo -e "\tmake -C containers/performance_tests clean" >> Makefile +echo -e "\tmake -C algorithms/unit_tests clean" >> Makefile +echo -e "\tmake -C example/fixture clean" >> Makefile +echo -e "\tmake -C example/feint clean" >> Makefile +echo -e "\tmake -C example/fenl clean" >> Makefile +echo -e "\tcd core; \\" >> Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} clean" >> Makefile diff --git a/lib/kokkos/tpls/gtest/gtest/LICENSE b/lib/kokkos/tpls/gtest/gtest/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..1941a11f8ce94389160b458927a29ba217542818 --- /dev/null +++ b/lib/kokkos/tpls/gtest/gtest/LICENSE @@ -0,0 +1,28 @@ +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/kokkos/tpls/gtest/gtest/README b/lib/kokkos/tpls/gtest/gtest/README new file mode 100644 index 0000000000000000000000000000000000000000..82964ecc329b474002c66cf534999519e8fc39a3 --- /dev/null +++ b/lib/kokkos/tpls/gtest/gtest/README @@ -0,0 +1,13 @@ +This is a fused source version of gtest 1.7.0. All that should be necessary to +start using gtest in your package is to declare the dependency and include +gtest/gtest.h. + +However, because some of the packages that are developed in Sierra do not use a +fused source version of gtest we need to make it possible for them to build with +this version as well as with their native build. To facilitate this we have +created symlinks for the other gtest headers that they use to the fused source +gtest.h. This will make it possible for them find the headers while still using +the fuse source version. This should not have any ill effects since the header is +protected and allows for only using the non-gtest.h headers in their files. + + diff --git a/lib/kokkos/tpls/gtest/gtest/gtest-all.cc b/lib/kokkos/tpls/gtest/gtest/gtest-all.cc new file mode 100644 index 0000000000000000000000000000000000000000..538c78db930ea72b5de6d5a9282c2f69e71e5c13 --- /dev/null +++ b/lib/kokkos/tpls/gtest/gtest/gtest-all.cc @@ -0,0 +1,9594 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// Google C++ Testing Framework (Google Test) +// +// Sometimes it's desirable to build Google Test by compiling a single file. +// This file serves this purpose. + +// This line ensures that gtest.h can be compiled on its own, even +// when it's fused. +#include "gtest/gtest.h" + +// The following lines pull in the real gtest *.cc files. +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Utilities for testing Google Test itself and code that uses Google Test +// (e.g. frameworks built on top of Google Test). + +#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ +#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ + + +namespace testing { + +// This helper class can be used to mock out Google Test failure reporting +// so that we can test Google Test or code that builds on Google Test. +// +// An object of this class appends a TestPartResult object to the +// TestPartResultArray object given in the constructor whenever a Google Test +// failure is reported. It can either intercept only failures that are +// generated in the same thread that created this object or it can intercept +// all generated failures. The scope of this mock object can be controlled with +// the second argument to the two arguments constructor. +class GTEST_API_ ScopedFakeTestPartResultReporter + : public TestPartResultReporterInterface { + public: + // The two possible mocking modes of this object. + enum InterceptMode { + INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures. + INTERCEPT_ALL_THREADS // Intercepts all failures. + }; + + // The c'tor sets this object as the test part result reporter used + // by Google Test. The 'result' parameter specifies where to report the + // results. This reporter will only catch failures generated in the current + // thread. DEPRECATED + explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result); + + // Same as above, but you can choose the interception scope of this object. + ScopedFakeTestPartResultReporter(InterceptMode intercept_mode, + TestPartResultArray* result); + + // The d'tor restores the previous test part result reporter. + virtual ~ScopedFakeTestPartResultReporter(); + + // Appends the TestPartResult object to the TestPartResultArray + // received in the constructor. + // + // This method is from the TestPartResultReporterInterface + // interface. + virtual void ReportTestPartResult(const TestPartResult& result); + private: + void Init(); + + const InterceptMode intercept_mode_; + TestPartResultReporterInterface* old_reporter_; + TestPartResultArray* const result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter); +}; + +namespace internal { + +// A helper class for implementing EXPECT_FATAL_FAILURE() and +// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +class GTEST_API_ SingleFailureChecker { + public: + // The constructor remembers the arguments. + SingleFailureChecker(const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr); + ~SingleFailureChecker(); + private: + const TestPartResultArray* const results_; + const TestPartResult::Type type_; + const string substr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker); +}; + +} // namespace internal + +} // namespace testing + +// A set of macros for testing Google Test assertions or code that's expected +// to generate Google Test fatal failures. It verifies that the given +// statement will cause exactly one fatal Google Test failure with 'substr' +// being part of the failure message. +// +// There are two different versions of this macro. EXPECT_FATAL_FAILURE only +// affects and considers failures generated in the current thread and +// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. +// +// The verification of the assertion is done correctly even when the statement +// throws an exception or aborts the current function. +// +// Known restrictions: +// - 'statement' cannot reference local non-static variables or +// non-static members of the current object. +// - 'statement' cannot return a value. +// - You cannot stream a failure message to this macro. +// +// Note that even though the implementations of the following two +// macros are much alike, we cannot refactor them to use a common +// helper macro, due to some peculiarity in how the preprocessor +// works. The AcceptsMacroThatExpandsToUnprotectedComma test in +// gtest_unittest.cc will fail to compile if we do that. +#define EXPECT_FATAL_FAILURE(statement, substr) \ + do { \ + class GTestExpectFatalFailureHelper {\ + public:\ + static void Execute() { statement; }\ + };\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ + GTestExpectFatalFailureHelper::Execute();\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ + do { \ + class GTestExpectFatalFailureHelper {\ + public:\ + static void Execute() { statement; }\ + };\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ALL_THREADS, >est_failures);\ + GTestExpectFatalFailureHelper::Execute();\ + }\ + } while (::testing::internal::AlwaysFalse()) + +// A macro for testing Google Test assertions or code that's expected to +// generate Google Test non-fatal failures. It asserts that the given +// statement will cause exactly one non-fatal Google Test failure with 'substr' +// being part of the failure message. +// +// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only +// affects and considers failures generated in the current thread and +// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. +// +// 'statement' is allowed to reference local variables and members of +// the current object. +// +// The verification of the assertion is done correctly even when the statement +// throws an exception or aborts the current function. +// +// Known restrictions: +// - You cannot stream a failure message to this macro. +// +// Note that even though the implementations of the following two +// macros are much alike, we cannot refactor them to use a common +// helper macro, due to some peculiarity in how the preprocessor +// works. If we do that, the code won't compile when the user gives +// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that +// expands to code containing an unprotected comma. The +// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc +// catches that. +// +// For the same reason, we have to write +// if (::testing::internal::AlwaysTrue()) { statement; } +// instead of +// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) +// to avoid an MSVC warning on unreachable code. +#define EXPECT_NONFATAL_FAILURE(statement, substr) \ + do {\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ + (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ + if (::testing::internal::AlwaysTrue()) { statement; }\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ + do {\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ + (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \ + >est_failures);\ + if (::testing::internal::AlwaysTrue()) { statement; }\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_ + +#include <ctype.h> +#include <math.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <wchar.h> +#include <wctype.h> + +#include <algorithm> +#include <iomanip> +#include <limits> +#include <ostream> // NOLINT +#include <sstream> +#include <vector> + +#if GTEST_OS_LINUX + +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +# include <fcntl.h> // NOLINT +# include <limits.h> // NOLINT +# include <sched.h> // NOLINT +// Declares vsnprintf(). This header is not available on Windows. +# include <strings.h> // NOLINT +# include <sys/mman.h> // NOLINT +# include <sys/time.h> // NOLINT +# include <unistd.h> // NOLINT +# include <string> + +#elif GTEST_OS_SYMBIAN +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include <sys/time.h> // NOLINT + +#elif GTEST_OS_ZOS +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include <sys/time.h> // NOLINT + +// On z/OS we additionally need strings.h for strcasecmp. +# include <strings.h> // NOLINT + +#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. + +# include <windows.h> // NOLINT + +#elif GTEST_OS_WINDOWS // We are on Windows proper. + +# include <io.h> // NOLINT +# include <sys/timeb.h> // NOLINT +# include <sys/types.h> // NOLINT +# include <sys/stat.h> // NOLINT + +# if GTEST_OS_WINDOWS_MINGW +// MinGW has gettimeofday() but not _ftime64(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +// TODO(kenton@google.com): There are other ways to get the time on +// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW +// supports these. consider using them instead. +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include <sys/time.h> // NOLINT +# endif // GTEST_OS_WINDOWS_MINGW + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include <windows.h> // NOLINT + +#else + +// Assume other platforms have gettimeofday(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include <sys/time.h> // NOLINT +# include <unistd.h> // NOLINT + +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include <stdexcept> +#endif + +#if GTEST_CAN_STREAM_RESULTS_ +# include <arpa/inet.h> // NOLINT +# include <netdb.h> // NOLINT +#endif + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Utility functions and classes used by the Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) +// +// This file contains purely Google Test's internal implementation. Please +// DO NOT #INCLUDE IT IN A USER PROGRAM. + +#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ +#define GTEST_SRC_GTEST_INTERNAL_INL_H_ + +// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is +// part of Google Test's implementation; otherwise it's undefined. +#if !GTEST_IMPLEMENTATION_ +// A user is trying to include this from his code - just say no. +# error "gtest-internal-inl.h is part of Google Test's internal implementation." +# error "It must not be included except by Google Test itself." +#endif // GTEST_IMPLEMENTATION_ + +#ifndef _WIN32_WCE +# include <errno.h> +#endif // !_WIN32_WCE +#include <stddef.h> +#include <stdlib.h> // For strtoll/_strtoul64/malloc/free. +#include <string.h> // For memmove. + +#include <algorithm> +#include <string> +#include <vector> + + +#if GTEST_CAN_STREAM_RESULTS_ +# include <arpa/inet.h> // NOLINT +# include <netdb.h> // NOLINT +#endif + +#if GTEST_OS_WINDOWS +# include <windows.h> // NOLINT +#endif // GTEST_OS_WINDOWS + + +namespace testing { + +// Declares the flags. +// +// We don't want the users to modify this flag in the code, but want +// Google Test's own unit tests to be able to access it. Therefore we +// declare it here as opposed to in gtest.h. +GTEST_DECLARE_bool_(death_test_use_fork); + +namespace internal { + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest; + +// Names of the flags (needed for parsing Google Test flags). +const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests"; +const char kBreakOnFailureFlag[] = "break_on_failure"; +const char kCatchExceptionsFlag[] = "catch_exceptions"; +const char kColorFlag[] = "color"; +const char kFilterFlag[] = "filter"; +const char kListTestsFlag[] = "list_tests"; +const char kOutputFlag[] = "output"; +const char kPrintTimeFlag[] = "print_time"; +const char kRandomSeedFlag[] = "random_seed"; +const char kRepeatFlag[] = "repeat"; +const char kShuffleFlag[] = "shuffle"; +const char kStackTraceDepthFlag[] = "stack_trace_depth"; +const char kStreamResultToFlag[] = "stream_result_to"; +const char kThrowOnFailureFlag[] = "throw_on_failure"; + +// A valid random seed must be in [1, kMaxRandomSeed]. +const int kMaxRandomSeed = 99999; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +GTEST_API_ extern bool g_help_flag; + +// Returns the current time in milliseconds. +GTEST_API_ TimeInMillis GetTimeInMillis(); + +// Returns true iff Google Test should use colors in the output. +GTEST_API_ bool ShouldUseColor(bool stdout_is_tty); + +// Formats the given time in milliseconds as seconds. +GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms); + +// Converts the given time in milliseconds to a date string in the ISO 8601 +// format, without the timezone information. N.B.: due to the use the +// non-reentrant localtime() function, this function is not thread safe. Do +// not use it in any code that can be called from multiple threads. +GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms); + +// Parses a string for an Int32 flag, in the form of "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +GTEST_API_ bool ParseInt32Flag( + const char* str, const char* flag, Int32* value); + +// Returns a random seed in range [1, kMaxRandomSeed] based on the +// given --gtest_random_seed flag value. +inline int GetRandomSeedFromFlag(Int32 random_seed_flag) { + const unsigned int raw_seed = (random_seed_flag == 0) ? + static_cast<unsigned int>(GetTimeInMillis()) : + static_cast<unsigned int>(random_seed_flag); + + // Normalizes the actual seed to range [1, kMaxRandomSeed] such that + // it's easy to type. + const int normalized_seed = + static_cast<int>((raw_seed - 1U) % + static_cast<unsigned int>(kMaxRandomSeed)) + 1; + return normalized_seed; +} + +// Returns the first valid random seed after 'seed'. The behavior is +// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is +// considered to be 1. +inline int GetNextRandomSeed(int seed) { + GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed) + << "Invalid random seed " << seed << " - must be in [1, " + << kMaxRandomSeed << "]."; + const int next_seed = seed + 1; + return (next_seed > kMaxRandomSeed) ? 1 : next_seed; +} + +// This class saves the values of all Google Test flags in its c'tor, and +// restores them in its d'tor. +class GTestFlagSaver { + public: + // The c'tor. + GTestFlagSaver() { + also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests); + break_on_failure_ = GTEST_FLAG(break_on_failure); + catch_exceptions_ = GTEST_FLAG(catch_exceptions); + color_ = GTEST_FLAG(color); + death_test_style_ = GTEST_FLAG(death_test_style); + death_test_use_fork_ = GTEST_FLAG(death_test_use_fork); + filter_ = GTEST_FLAG(filter); + internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); + list_tests_ = GTEST_FLAG(list_tests); + output_ = GTEST_FLAG(output); + print_time_ = GTEST_FLAG(print_time); + random_seed_ = GTEST_FLAG(random_seed); + repeat_ = GTEST_FLAG(repeat); + shuffle_ = GTEST_FLAG(shuffle); + stack_trace_depth_ = GTEST_FLAG(stack_trace_depth); + stream_result_to_ = GTEST_FLAG(stream_result_to); + throw_on_failure_ = GTEST_FLAG(throw_on_failure); + } + + // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS. + ~GTestFlagSaver() { + GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_; + GTEST_FLAG(break_on_failure) = break_on_failure_; + GTEST_FLAG(catch_exceptions) = catch_exceptions_; + GTEST_FLAG(color) = color_; + GTEST_FLAG(death_test_style) = death_test_style_; + GTEST_FLAG(death_test_use_fork) = death_test_use_fork_; + GTEST_FLAG(filter) = filter_; + GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; + GTEST_FLAG(list_tests) = list_tests_; + GTEST_FLAG(output) = output_; + GTEST_FLAG(print_time) = print_time_; + GTEST_FLAG(random_seed) = random_seed_; + GTEST_FLAG(repeat) = repeat_; + GTEST_FLAG(shuffle) = shuffle_; + GTEST_FLAG(stack_trace_depth) = stack_trace_depth_; + GTEST_FLAG(stream_result_to) = stream_result_to_; + GTEST_FLAG(throw_on_failure) = throw_on_failure_; + } + + private: + // Fields for saving the original values of flags. + bool also_run_disabled_tests_; + bool break_on_failure_; + bool catch_exceptions_; + std::string color_; + std::string death_test_style_; + bool death_test_use_fork_; + std::string filter_; + std::string internal_run_death_test_; + bool list_tests_; + std::string output_; + bool print_time_; + internal::Int32 random_seed_; + internal::Int32 repeat_; + bool shuffle_; + internal::Int32 stack_trace_depth_; + std::string stream_result_to_; + bool throw_on_failure_; +} GTEST_ATTRIBUTE_UNUSED_; + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +GTEST_API_ std::string CodePointToUtf8(UInt32 code_point); + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars); + +// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file +// if the variable is present. If a file already exists at this location, this +// function will write over it. If the variable is present, but the file cannot +// be created, prints an error and exits. +void WriteToShardStatusFileIfNeeded(); + +// Checks whether sharding is enabled by examining the relevant +// environment variable values. If the variables are present, +// but inconsistent (e.g., shard_index >= total_shards), prints +// an error and exits. If in_subprocess_for_death_test, sharding is +// disabled because it must only be applied to the original test +// process. Otherwise, we could filter out death tests we intended to execute. +GTEST_API_ bool ShouldShard(const char* total_shards_str, + const char* shard_index_str, + bool in_subprocess_for_death_test); + +// Parses the environment variable var as an Int32. If it is unset, +// returns default_val. If it is not an Int32, prints an error and +// and aborts. +GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val); + +// Given the total number of shards, the shard index, and the test id, +// returns true iff the test should be run on this shard. The test id is +// some arbitrary but unique non-negative integer assigned to each test +// method. Assumes that 0 <= shard_index < total_shards. +GTEST_API_ bool ShouldRunTestOnShard( + int total_shards, int shard_index, int test_id); + +// STL container utilities. + +// Returns the number of elements in the given container that satisfy +// the given predicate. +template <class Container, typename Predicate> +inline int CountIf(const Container& c, Predicate predicate) { + // Implemented as an explicit loop since std::count_if() in libCstd on + // Solaris has a non-standard signature. + int count = 0; + for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) { + if (predicate(*it)) + ++count; + } + return count; +} + +// Applies a function/functor to each element in the container. +template <class Container, typename Functor> +void ForEach(const Container& c, Functor functor) { + std::for_each(c.begin(), c.end(), functor); +} + +// Returns the i-th element of the vector, or default_value if i is not +// in range [0, v.size()). +template <typename E> +inline E GetElementOr(const std::vector<E>& v, int i, E default_value) { + return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i]; +} + +// Performs an in-place shuffle of a range of the vector's elements. +// 'begin' and 'end' are element indices as an STL-style range; +// i.e. [begin, end) are shuffled, where 'end' == size() means to +// shuffle to the end of the vector. +template <typename E> +void ShuffleRange(internal::Random* random, int begin, int end, + std::vector<E>* v) { + const int size = static_cast<int>(v->size()); + GTEST_CHECK_(0 <= begin && begin <= size) + << "Invalid shuffle range start " << begin << ": must be in range [0, " + << size << "]."; + GTEST_CHECK_(begin <= end && end <= size) + << "Invalid shuffle range finish " << end << ": must be in range [" + << begin << ", " << size << "]."; + + // Fisher-Yates shuffle, from + // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle + for (int range_width = end - begin; range_width >= 2; range_width--) { + const int last_in_range = begin + range_width - 1; + const int selected = begin + random->Generate(range_width); + std::swap((*v)[selected], (*v)[last_in_range]); + } +} + +// Performs an in-place shuffle of the vector's elements. +template <typename E> +inline void Shuffle(internal::Random* random, std::vector<E>* v) { + ShuffleRange(random, 0, static_cast<int>(v->size()), v); +} + +// A function for deleting an object. Handy for being used as a +// functor. +template <typename T> +static void Delete(T* x) { + delete x; +} + +// A predicate that checks the key of a TestProperty against a known key. +// +// TestPropertyKeyIs is copyable. +class TestPropertyKeyIs { + public: + // Constructor. + // + // TestPropertyKeyIs has NO default constructor. + explicit TestPropertyKeyIs(const std::string& key) : key_(key) {} + + // Returns true iff the test name of test property matches on key_. + bool operator()(const TestProperty& test_property) const { + return test_property.key() == key_; + } + + private: + std::string key_; +}; + +// Class UnitTestOptions. +// +// This class contains functions for processing options the user +// specifies when running the tests. It has only static members. +// +// In most cases, the user can specify an option using either an +// environment variable or a command line flag. E.g. you can set the +// test filter using either GTEST_FILTER or --gtest_filter. If both +// the variable and the flag are present, the latter overrides the +// former. +class GTEST_API_ UnitTestOptions { + public: + // Functions for processing the gtest_output flag. + + // Returns the output format, or "" for normal printed output. + static std::string GetOutputFormat(); + + // Returns the absolute path of the requested output file, or the + // default (test_detail.xml in the original working directory) if + // none was explicitly specified. + static std::string GetAbsolutePathToOutputFile(); + + // Functions for processing the gtest_filter flag. + + // Returns true iff the wildcard pattern matches the string. The + // first ':' or '\0' character in pattern marks the end of it. + // + // This recursive algorithm isn't very efficient, but is clear and + // works well enough for matching test names, which are short. + static bool PatternMatchesString(const char *pattern, const char *str); + + // Returns true iff the user-specified filter matches the test case + // name and the test name. + static bool FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name); + +#if GTEST_OS_WINDOWS + // Function for supporting the gtest_catch_exception flag. + + // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the + // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. + // This function is useful as an __except condition. + static int GTestShouldProcessSEH(DWORD exception_code); +#endif // GTEST_OS_WINDOWS + + // Returns true if "name" matches the ':' separated list of glob-style + // filters in "filter". + static bool MatchesFilter(const std::string& name, const char* filter); +}; + +// Returns the current application's name, removing directory path if that +// is present. Used by UnitTestOptions::GetOutputFile. +GTEST_API_ FilePath GetCurrentExecutableName(); + +// The role interface for getting the OS stack trace as a string. +class OsStackTraceGetterInterface { + public: + OsStackTraceGetterInterface() {} + virtual ~OsStackTraceGetterInterface() {} + + // Returns the current OS stack trace as an std::string. Parameters: + // + // max_depth - the maximum number of stack frames to be included + // in the trace. + // skip_count - the number of top frames to be skipped; doesn't count + // against max_depth. + virtual string CurrentStackTrace(int max_depth, int skip_count) = 0; + + // UponLeavingGTest() should be called immediately before Google Test calls + // user code. It saves some information about the current stack that + // CurrentStackTrace() will use to find and hide Google Test stack frames. + virtual void UponLeavingGTest() = 0; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface); +}; + +// A working implementation of the OsStackTraceGetterInterface interface. +class OsStackTraceGetter : public OsStackTraceGetterInterface { + public: + OsStackTraceGetter() : caller_frame_(NULL) {} + + virtual string CurrentStackTrace(int max_depth, int skip_count) + GTEST_LOCK_EXCLUDED_(mutex_); + + virtual void UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_); + + // This string is inserted in place of stack frames that are part of + // Google Test's implementation. + static const char* const kElidedFramesMarker; + + private: + Mutex mutex_; // protects all internal state + + // We save the stack frame below the frame that calls user code. + // We do this because the address of the frame immediately below + // the user code changes between the call to UponLeavingGTest() + // and any calls to CurrentStackTrace() from within the user code. + void* caller_frame_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter); +}; + +// Information about a Google Test trace point. +struct TraceInfo { + const char* file; + int line; + std::string message; +}; + +// This is the default global test part result reporter used in UnitTestImpl. +// This class should only be used by UnitTestImpl. +class DefaultGlobalTestPartResultReporter + : public TestPartResultReporterInterface { + public: + explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test); + // Implements the TestPartResultReporterInterface. Reports the test part + // result in the current test. + virtual void ReportTestPartResult(const TestPartResult& result); + + private: + UnitTestImpl* const unit_test_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter); +}; + +// This is the default per thread test part result reporter used in +// UnitTestImpl. This class should only be used by UnitTestImpl. +class DefaultPerThreadTestPartResultReporter + : public TestPartResultReporterInterface { + public: + explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test); + // Implements the TestPartResultReporterInterface. The implementation just + // delegates to the current global test part result reporter of *unit_test_. + virtual void ReportTestPartResult(const TestPartResult& result); + + private: + UnitTestImpl* const unit_test_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter); +}; + +// The private implementation of the UnitTest class. We don't protect +// the methods under a mutex, as this class is not accessible by a +// user and the UnitTest class that delegates work to this class does +// proper locking. +class GTEST_API_ UnitTestImpl { + public: + explicit UnitTestImpl(UnitTest* parent); + virtual ~UnitTestImpl(); + + // There are two different ways to register your own TestPartResultReporter. + // You can register your own repoter to listen either only for test results + // from the current thread or for results from all threads. + // By default, each per-thread test result repoter just passes a new + // TestPartResult to the global test result reporter, which registers the + // test part result for the currently running test. + + // Returns the global test part result reporter. + TestPartResultReporterInterface* GetGlobalTestPartResultReporter(); + + // Sets the global test part result reporter. + void SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter); + + // Returns the test part result reporter for the current thread. + TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread(); + + // Sets the test part result reporter for the current thread. + void SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter); + + // Gets the number of successful test cases. + int successful_test_case_count() const; + + // Gets the number of failed test cases. + int failed_test_case_count() const; + + // Gets the number of all test cases. + int total_test_case_count() const; + + // Gets the number of all test cases that contain at least one test + // that should run. + int test_case_to_run_count() const; + + // Gets the number of successful tests. + int successful_test_count() const; + + // Gets the number of failed tests. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Gets the number of all tests. + int total_test_count() const; + + // Gets the number of tests that should run. + int test_to_run_count() const; + + // Gets the time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp() const { return start_timestamp_; } + + // Gets the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns true iff the unit test passed (i.e. all test cases passed). + bool Passed() const { return !Failed(); } + + // Returns true iff the unit test failed (i.e. some test case failed + // or something outside of all tests failed). + bool Failed() const { + return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed(); + } + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + const TestCase* GetTestCase(int i) const { + const int index = GetElementOr(test_case_indices_, i, -1); + return index < 0 ? NULL : test_cases_[i]; + } + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + TestCase* GetMutableTestCase(int i) { + const int index = GetElementOr(test_case_indices_, i, -1); + return index < 0 ? NULL : test_cases_[index]; + } + + // Provides access to the event listener list. + TestEventListeners* listeners() { return &listeners_; } + + // Returns the TestResult for the test that's currently running, or + // the TestResult for the ad hoc test if no test is running. + TestResult* current_test_result(); + + // Returns the TestResult for the ad hoc test. + const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; } + + // Sets the OS stack trace getter. + // + // Does nothing if the input and the current OS stack trace getter + // are the same; otherwise, deletes the old getter and makes the + // input the current getter. + void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter); + + // Returns the current OS stack trace getter if it is not NULL; + // otherwise, creates an OsStackTraceGetter, makes it the current + // getter, and returns it. + OsStackTraceGetterInterface* os_stack_trace_getter(); + + // Returns the current OS stack trace as an std::string. + // + // The maximum number of stack frames to be included is specified by + // the gtest_stack_trace_depth flag. The skip_count parameter + // specifies the number of top frames to be skipped, which doesn't + // count against the number of frames to be included. + // + // For example, if Foo() calls Bar(), which in turn calls + // CurrentOsStackTraceExceptTop(1), Foo() will be included in the + // trace but Bar() and CurrentOsStackTraceExceptTop() won't. + std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_; + + // Finds and returns a TestCase with the given name. If one doesn't + // exist, creates one and returns it. + // + // Arguments: + // + // test_case_name: name of the test case + // type_param: the name of the test's type parameter, or NULL if + // this is not a typed or a type-parameterized test. + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase* GetTestCase(const char* test_case_name, + const char* type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Adds a TestInfo to the unit test. + // + // Arguments: + // + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + // test_info: the TestInfo object + void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + TestInfo* test_info) { + // In order to support thread-safe death tests, we need to + // remember the original working directory when the test program + // was first invoked. We cannot do this in RUN_ALL_TESTS(), as + // the user may have changed the current directory before calling + // RUN_ALL_TESTS(). Therefore we capture the current directory in + // AddTestInfo(), which is called to register a TEST or TEST_F + // before main() is reached. + if (original_working_dir_.IsEmpty()) { + original_working_dir_.Set(FilePath::GetCurrentDir()); + GTEST_CHECK_(!original_working_dir_.IsEmpty()) + << "Failed to get the current working directory."; + } + + GetTestCase(test_info->test_case_name(), + test_info->type_param(), + set_up_tc, + tear_down_tc)->AddTestInfo(test_info); + } + +#if GTEST_HAS_PARAM_TEST + // Returns ParameterizedTestCaseRegistry object used to keep track of + // value-parameterized tests and instantiate and register them. + internal::ParameterizedTestCaseRegistry& parameterized_test_registry() { + return parameterized_test_registry_; + } +#endif // GTEST_HAS_PARAM_TEST + + // Sets the TestCase object for the test that's currently running. + void set_current_test_case(TestCase* a_current_test_case) { + current_test_case_ = a_current_test_case; + } + + // Sets the TestInfo object for the test that's currently running. If + // current_test_info is NULL, the assertion results will be stored in + // ad_hoc_test_result_. + void set_current_test_info(TestInfo* a_current_test_info) { + current_test_info_ = a_current_test_info; + } + + // Registers all parameterized tests defined using TEST_P and + // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter + // combination. This method can be called more then once; it has guards + // protecting from registering the tests more then once. If + // value-parameterized tests are disabled, RegisterParameterizedTests is + // present but does nothing. + void RegisterParameterizedTests(); + + // Runs all tests in this UnitTest object, prints the result, and + // returns true if all tests are successful. If any exception is + // thrown during a test, this test is considered to be failed, but + // the rest of the tests will still be run. + bool RunAllTests(); + + // Clears the results of all tests, except the ad hoc tests. + void ClearNonAdHocTestResult() { + ForEach(test_cases_, TestCase::ClearTestCaseResult); + } + + // Clears the results of ad-hoc test assertions. + void ClearAdHocTestResult() { + ad_hoc_test_result_.Clear(); + } + + // Adds a TestProperty to the current TestResult object when invoked in a + // context of a test or a test case, or to the global property set. If the + // result already contains a property with the same key, the value will be + // updated. + void RecordProperty(const TestProperty& test_property); + + enum ReactionToSharding { + HONOR_SHARDING_PROTOCOL, + IGNORE_SHARDING_PROTOCOL + }; + + // Matches the full name of each test against the user-specified + // filter to decide whether the test should run, then records the + // result in each TestCase and TestInfo object. + // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests + // based on sharding variables in the environment. + // Returns the number of tests that should run. + int FilterTests(ReactionToSharding shard_tests); + + // Prints the names of the tests matching the user-specified filter flag. + void ListTestsMatchingFilter(); + + const TestCase* current_test_case() const { return current_test_case_; } + TestInfo* current_test_info() { return current_test_info_; } + const TestInfo* current_test_info() const { return current_test_info_; } + + // Returns the vector of environments that need to be set-up/torn-down + // before/after the tests are run. + std::vector<Environment*>& environments() { return environments_; } + + // Getters for the per-thread Google Test trace stack. + std::vector<TraceInfo>& gtest_trace_stack() { + return *(gtest_trace_stack_.pointer()); + } + const std::vector<TraceInfo>& gtest_trace_stack() const { + return gtest_trace_stack_.get(); + } + +#if GTEST_HAS_DEATH_TEST + void InitDeathTestSubprocessControlInfo() { + internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag()); + } + // Returns a pointer to the parsed --gtest_internal_run_death_test + // flag, or NULL if that flag was not specified. + // This information is useful only in a death test child process. + // Must not be called before a call to InitGoogleTest. + const InternalRunDeathTestFlag* internal_run_death_test_flag() const { + return internal_run_death_test_flag_.get(); + } + + // Returns a pointer to the current death test factory. + internal::DeathTestFactory* death_test_factory() { + return death_test_factory_.get(); + } + + void SuppressTestEventsIfInSubprocess(); + + friend class ReplaceDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + + // Initializes the event listener performing XML output as specified by + // UnitTestOptions. Must not be called before InitGoogleTest. + void ConfigureXmlOutput(); + +#if GTEST_CAN_STREAM_RESULTS_ + // Initializes the event listener for streaming test results to a socket. + // Must not be called before InitGoogleTest. + void ConfigureStreamingOutput(); +#endif + + // Performs initialization dependent upon flag values obtained in + // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to + // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest + // this function is also called from RunAllTests. Since this function can be + // called more than once, it has to be idempotent. + void PostFlagParsingInit(); + + // Gets the random seed used at the start of the current test iteration. + int random_seed() const { return random_seed_; } + + // Gets the random number generator. + internal::Random* random() { return &random_; } + + // Shuffles all test cases, and the tests within each test case, + // making sure that death tests are still run first. + void ShuffleTests(); + + // Restores the test cases and tests to their order before the first shuffle. + void UnshuffleTests(); + + // Returns the value of GTEST_FLAG(catch_exceptions) at the moment + // UnitTest::Run() starts. + bool catch_exceptions() const { return catch_exceptions_; } + + private: + friend class ::testing::UnitTest; + + // Used by UnitTest::Run() to capture the state of + // GTEST_FLAG(catch_exceptions) at the moment it starts. + void set_catch_exceptions(bool value) { catch_exceptions_ = value; } + + // The UnitTest object that owns this implementation object. + UnitTest* const parent_; + + // The working directory when the first TEST() or TEST_F() was + // executed. + internal::FilePath original_working_dir_; + + // The default test part result reporters. + DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_; + DefaultPerThreadTestPartResultReporter + default_per_thread_test_part_result_reporter_; + + // Points to (but doesn't own) the global test part result reporter. + TestPartResultReporterInterface* global_test_part_result_repoter_; + + // Protects read and write access to global_test_part_result_reporter_. + internal::Mutex global_test_part_result_reporter_mutex_; + + // Points to (but doesn't own) the per-thread test part result reporter. + internal::ThreadLocal<TestPartResultReporterInterface*> + per_thread_test_part_result_reporter_; + + // The vector of environments that need to be set-up/torn-down + // before/after the tests are run. + std::vector<Environment*> environments_; + + // The vector of TestCases in their original order. It owns the + // elements in the vector. + std::vector<TestCase*> test_cases_; + + // Provides a level of indirection for the test case list to allow + // easy shuffling and restoring the test case order. The i-th + // element of this vector is the index of the i-th test case in the + // shuffled order. + std::vector<int> test_case_indices_; + +#if GTEST_HAS_PARAM_TEST + // ParameterizedTestRegistry object used to register value-parameterized + // tests. + internal::ParameterizedTestCaseRegistry parameterized_test_registry_; + + // Indicates whether RegisterParameterizedTests() has been called already. + bool parameterized_tests_registered_; +#endif // GTEST_HAS_PARAM_TEST + + // Index of the last death test case registered. Initially -1. + int last_death_test_case_; + + // This points to the TestCase for the currently running test. It + // changes as Google Test goes through one test case after another. + // When no test is running, this is set to NULL and Google Test + // stores assertion results in ad_hoc_test_result_. Initially NULL. + TestCase* current_test_case_; + + // This points to the TestInfo for the currently running test. It + // changes as Google Test goes through one test after another. When + // no test is running, this is set to NULL and Google Test stores + // assertion results in ad_hoc_test_result_. Initially NULL. + TestInfo* current_test_info_; + + // Normally, a user only writes assertions inside a TEST or TEST_F, + // or inside a function called by a TEST or TEST_F. Since Google + // Test keeps track of which test is current running, it can + // associate such an assertion with the test it belongs to. + // + // If an assertion is encountered when no TEST or TEST_F is running, + // Google Test attributes the assertion result to an imaginary "ad hoc" + // test, and records the result in ad_hoc_test_result_. + TestResult ad_hoc_test_result_; + + // The list of event listeners that can be used to track events inside + // Google Test. + TestEventListeners listeners_; + + // The OS stack trace getter. Will be deleted when the UnitTest + // object is destructed. By default, an OsStackTraceGetter is used, + // but the user can set this field to use a custom getter if that is + // desired. + OsStackTraceGetterInterface* os_stack_trace_getter_; + + // True iff PostFlagParsingInit() has been called. + bool post_flag_parse_init_performed_; + + // The random number seed used at the beginning of the test run. + int random_seed_; + + // Our random number generator. + internal::Random random_; + + // The time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp_; + + // How long the test took to run, in milliseconds. + TimeInMillis elapsed_time_; + +#if GTEST_HAS_DEATH_TEST + // The decomposed components of the gtest_internal_run_death_test flag, + // parsed when RUN_ALL_TESTS is called. + internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_; + internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_; +#endif // GTEST_HAS_DEATH_TEST + + // A per-thread stack of traces created by the SCOPED_TRACE() macro. + internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_; + + // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests() + // starts. + bool catch_exceptions_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl); +}; // class UnitTestImpl + +// Convenience function for accessing the global UnitTest +// implementation object. +inline UnitTestImpl* GetUnitTestImpl() { + return UnitTest::GetInstance()->impl(); +} + +#if GTEST_USES_SIMPLE_RE + +// Internal helper functions for implementing the simple regular +// expression matcher. +GTEST_API_ bool IsInSet(char ch, const char* str); +GTEST_API_ bool IsAsciiDigit(char ch); +GTEST_API_ bool IsAsciiPunct(char ch); +GTEST_API_ bool IsRepeat(char ch); +GTEST_API_ bool IsAsciiWhiteSpace(char ch); +GTEST_API_ bool IsAsciiWordChar(char ch); +GTEST_API_ bool IsValidEscape(char ch); +GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch); +GTEST_API_ bool ValidateRegex(const char* regex); +GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str); +GTEST_API_ bool MatchRepetitionAndRegexAtHead( + bool escaped, char ch, char repeat, const char* regex, const char* str); +GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str); + +#endif // GTEST_USES_SIMPLE_RE + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. +GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv); +GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv); + +#if GTEST_HAS_DEATH_TEST + +// Returns the message describing the last system error, regardless of the +// platform. +GTEST_API_ std::string GetLastErrnoDescription(); + +# if GTEST_OS_WINDOWS +// Provides leak-safe Windows kernel handle ownership. +class AutoHandle { + public: + AutoHandle() : handle_(INVALID_HANDLE_VALUE) {} + explicit AutoHandle(HANDLE handle) : handle_(handle) {} + + ~AutoHandle() { Reset(); } + + HANDLE Get() const { return handle_; } + void Reset() { Reset(INVALID_HANDLE_VALUE); } + void Reset(HANDLE handle) { + if (handle != handle_) { + if (handle_ != INVALID_HANDLE_VALUE) + ::CloseHandle(handle_); + handle_ = handle; + } + } + + private: + HANDLE handle_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle); +}; +# endif // GTEST_OS_WINDOWS + +// Attempts to parse a string into a positive integer pointed to by the +// number parameter. Returns true if that is possible. +// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use +// it here. +template <typename Integer> +bool ParseNaturalNumber(const ::std::string& str, Integer* number) { + // Fail fast if the given string does not begin with a digit; + // this bypasses strtoXXX's "optional leading whitespace and plus + // or minus sign" semantics, which are undesirable here. + if (str.empty() || !IsDigit(str[0])) { + return false; + } + errno = 0; + + char* end; + // BiggestConvertible is the largest integer type that system-provided + // string-to-number conversion routines can return. + +# if GTEST_OS_WINDOWS && !defined(__GNUC__) + + // MSVC and C++ Builder define __int64 instead of the standard long long. + typedef unsigned __int64 BiggestConvertible; + const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10); + +# else + + typedef unsigned long long BiggestConvertible; // NOLINT + const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10); + +# endif // GTEST_OS_WINDOWS && !defined(__GNUC__) + + const bool parse_success = *end == '\0' && errno == 0; + + // TODO(vladl@google.com): Convert this to compile time assertion when it is + // available. + GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed)); + + const Integer result = static_cast<Integer>(parsed); + if (parse_success && static_cast<BiggestConvertible>(result) == parsed) { + *number = result; + return true; + } + return false; +} +#endif // GTEST_HAS_DEATH_TEST + +// TestResult contains some private methods that should be hidden from +// Google Test user but are required for testing. This class allow our tests +// to access them. +// +// This class is supplied only for the purpose of testing Google Test's own +// constructs. Do not use it in user tests, either directly or indirectly. +class TestResultAccessor { + public: + static void RecordProperty(TestResult* test_result, + const std::string& xml_element, + const TestProperty& property) { + test_result->RecordProperty(xml_element, property); + } + + static void ClearTestPartResults(TestResult* test_result) { + test_result->ClearTestPartResults(); + } + + static const std::vector<testing::TestPartResult>& test_part_results( + const TestResult& test_result) { + return test_result.test_part_results(); + } +}; + +#if GTEST_CAN_STREAM_RESULTS_ + +// Streams test results to the given port on the given host machine. +class StreamingListener : public EmptyTestEventListener { + public: + // Abstract base class for writing strings to a socket. + class AbstractSocketWriter { + public: + virtual ~AbstractSocketWriter() {} + + // Sends a string to the socket. + virtual void Send(const string& message) = 0; + + // Closes the socket. + virtual void CloseConnection() {} + + // Sends a string and a newline to the socket. + void SendLn(const string& message) { + Send(message + "\n"); + } + }; + + // Concrete class for actually writing strings to a socket. + class SocketWriter : public AbstractSocketWriter { + public: + SocketWriter(const string& host, const string& port) + : sockfd_(-1), host_name_(host), port_num_(port) { + MakeConnection(); + } + + virtual ~SocketWriter() { + if (sockfd_ != -1) + CloseConnection(); + } + + // Sends a string to the socket. + virtual void Send(const string& message) { + GTEST_CHECK_(sockfd_ != -1) + << "Send() can be called only when there is a connection."; + + const int len = static_cast<int>(message.length()); + if (write(sockfd_, message.c_str(), len) != len) { + GTEST_LOG_(WARNING) + << "stream_result_to: failed to stream to " + << host_name_ << ":" << port_num_; + } + } + + private: + // Creates a client socket and connects to the server. + void MakeConnection(); + + // Closes the socket. + void CloseConnection() { + GTEST_CHECK_(sockfd_ != -1) + << "CloseConnection() can be called only when there is a connection."; + + close(sockfd_); + sockfd_ = -1; + } + + int sockfd_; // socket file descriptor + const string host_name_; + const string port_num_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter); + }; // class SocketWriter + + // Escapes '=', '&', '%', and '\n' characters in str as "%xx". + static string UrlEncode(const char* str); + + StreamingListener(const string& host, const string& port) + : socket_writer_(new SocketWriter(host, port)) { Start(); } + + explicit StreamingListener(AbstractSocketWriter* socket_writer) + : socket_writer_(socket_writer) { Start(); } + + void OnTestProgramStart(const UnitTest& /* unit_test */) { + SendLn("event=TestProgramStart"); + } + + void OnTestProgramEnd(const UnitTest& unit_test) { + // Note that Google Test current only report elapsed time for each + // test iteration, not for the entire test program. + SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed())); + + // Notify the streaming server to stop. + socket_writer_->CloseConnection(); + } + + void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) { + SendLn("event=TestIterationStart&iteration=" + + StreamableToString(iteration)); + } + + void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) { + SendLn("event=TestIterationEnd&passed=" + + FormatBool(unit_test.Passed()) + "&elapsed_time=" + + StreamableToString(unit_test.elapsed_time()) + "ms"); + } + + void OnTestCaseStart(const TestCase& test_case) { + SendLn(std::string("event=TestCaseStart&name=") + test_case.name()); + } + + void OnTestCaseEnd(const TestCase& test_case) { + SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed()) + + "&elapsed_time=" + StreamableToString(test_case.elapsed_time()) + + "ms"); + } + + void OnTestStart(const TestInfo& test_info) { + SendLn(std::string("event=TestStart&name=") + test_info.name()); + } + + void OnTestEnd(const TestInfo& test_info) { + SendLn("event=TestEnd&passed=" + + FormatBool((test_info.result())->Passed()) + + "&elapsed_time=" + + StreamableToString((test_info.result())->elapsed_time()) + "ms"); + } + + void OnTestPartResult(const TestPartResult& test_part_result) { + const char* file_name = test_part_result.file_name(); + if (file_name == NULL) + file_name = ""; + SendLn("event=TestPartResult&file=" + UrlEncode(file_name) + + "&line=" + StreamableToString(test_part_result.line_number()) + + "&message=" + UrlEncode(test_part_result.message())); + } + + private: + // Sends the given message and a newline to the socket. + void SendLn(const string& message) { socket_writer_->SendLn(message); } + + // Called at the start of streaming to notify the receiver what + // protocol we are using. + void Start() { SendLn("gtest_streaming_protocol_version=1.0"); } + + string FormatBool(bool value) { return value ? "1" : "0"; } + + const scoped_ptr<AbstractSocketWriter> socket_writer_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener); +}; // class StreamingListener + +#endif // GTEST_CAN_STREAM_RESULTS_ + +} // namespace internal +} // namespace testing + +#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_OS_WINDOWS +# define vsnprintf _vsnprintf +#endif // GTEST_OS_WINDOWS + +namespace testing { + +using internal::CountIf; +using internal::ForEach; +using internal::GetElementOr; +using internal::Shuffle; + +// Constants. + +// A test whose test case name or test name matches this filter is +// disabled and not run. +static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; + +// A test case whose name matches this filter is considered a death +// test case and will be run before test cases whose name doesn't +// match this filter. +static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; + +// A test filter that matches everything. +static const char kUniversalFilter[] = "*"; + +// The default output file for XML output. +static const char kDefaultOutputFile[] = "test_detail.xml"; + +// The environment variable name for the test shard index. +static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; +// The environment variable name for the total number of test shards. +static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; +// The environment variable name for the test shard status file. +static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; + +namespace internal { + +// The text used in failure messages to indicate the start of the +// stack trace. +const char kStackTraceMarker[] = "\nStack trace:\n"; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +bool g_help_flag = false; + +} // namespace internal + +static const char* GetDefaultFilter() { + return kUniversalFilter; +} + +GTEST_DEFINE_bool_( + also_run_disabled_tests, + internal::BoolFromGTestEnv("also_run_disabled_tests", false), + "Run disabled tests too, in addition to the tests normally being run."); + +GTEST_DEFINE_bool_( + break_on_failure, + internal::BoolFromGTestEnv("break_on_failure", false), + "True iff a failed assertion should be a debugger break-point."); + +GTEST_DEFINE_bool_( + catch_exceptions, + internal::BoolFromGTestEnv("catch_exceptions", true), + "True iff " GTEST_NAME_ + " should catch exceptions and treat them as test failures."); + +GTEST_DEFINE_string_( + color, + internal::StringFromGTestEnv("color", "auto"), + "Whether to use colors in the output. Valid values: yes, no, " + "and auto. 'auto' means to use colors if the output is " + "being sent to a terminal and the TERM environment variable " + "is set to a terminal type that supports colors."); + +GTEST_DEFINE_string_( + filter, + internal::StringFromGTestEnv("filter", GetDefaultFilter()), + "A colon-separated list of glob (not regex) patterns " + "for filtering the tests to run, optionally followed by a " + "'-' and a : separated list of negative patterns (tests to " + "exclude). A test is run if it matches one of the positive " + "patterns and does not match any of the negative patterns."); + +GTEST_DEFINE_bool_(list_tests, false, + "List all tests without running them."); + +GTEST_DEFINE_string_( + output, + internal::StringFromGTestEnv("output", ""), + "A format (currently must be \"xml\"), optionally followed " + "by a colon and an output file name or directory. A directory " + "is indicated by a trailing pathname separator. " + "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " + "If a directory is specified, output files will be created " + "within that directory, with file-names based on the test " + "executable's name and, if necessary, made unique by adding " + "digits."); + +GTEST_DEFINE_bool_( + print_time, + internal::BoolFromGTestEnv("print_time", true), + "True iff " GTEST_NAME_ + " should display elapsed time in text output."); + +GTEST_DEFINE_int32_( + random_seed, + internal::Int32FromGTestEnv("random_seed", 0), + "Random number seed to use when shuffling test orders. Must be in range " + "[1, 99999], or 0 to use a seed based on the current time."); + +GTEST_DEFINE_int32_( + repeat, + internal::Int32FromGTestEnv("repeat", 1), + "How many times to repeat each test. Specify a negative number " + "for repeating forever. Useful for shaking out flaky tests."); + +GTEST_DEFINE_bool_( + show_internal_stack_frames, false, + "True iff " GTEST_NAME_ " should include internal stack frames when " + "printing test failure stack traces."); + +GTEST_DEFINE_bool_( + shuffle, + internal::BoolFromGTestEnv("shuffle", false), + "True iff " GTEST_NAME_ + " should randomize tests' order on every run."); + +GTEST_DEFINE_int32_( + stack_trace_depth, + internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), + "The maximum number of stack frames to print when an " + "assertion fails. The valid range is 0 through 100, inclusive."); + +GTEST_DEFINE_string_( + stream_result_to, + internal::StringFromGTestEnv("stream_result_to", ""), + "This flag specifies the host name and the port number on which to stream " + "test results. Example: \"localhost:555\". The flag is effective only on " + "Linux."); + +GTEST_DEFINE_bool_( + throw_on_failure, + internal::BoolFromGTestEnv("throw_on_failure", false), + "When this flag is specified, a failed assertion will throw an exception " + "if exceptions are enabled or exit the program with a non-zero code " + "otherwise."); + +namespace internal { + +// Generates a random number from [0, range), using a Linear +// Congruential Generator (LCG). Crashes if 'range' is 0 or greater +// than kMaxRange. +UInt32 Random::Generate(UInt32 range) { + // These constants are the same as are used in glibc's rand(3). + state_ = (1103515245U*state_ + 12345U) % kMaxRange; + + GTEST_CHECK_(range > 0) + << "Cannot generate a number in the range [0, 0)."; + GTEST_CHECK_(range <= kMaxRange) + << "Generation of a number in [0, " << range << ") was requested, " + << "but this can only generate numbers in [0, " << kMaxRange << ")."; + + // Converting via modulus introduces a bit of downward bias, but + // it's simple, and a linear congruential generator isn't too good + // to begin with. + return state_ % range; +} + +// GTestIsInitialized() returns true iff the user has initialized +// Google Test. Useful for catching the user mistake of not initializing +// Google Test before calling RUN_ALL_TESTS(). +// +// A user must call testing::InitGoogleTest() to initialize Google +// Test. g_init_gtest_count is set to the number of times +// InitGoogleTest() has been called. We don't protect this variable +// under a mutex as it is only accessed in the main thread. +GTEST_API_ int g_init_gtest_count = 0; +static bool GTestIsInitialized() { return g_init_gtest_count != 0; } + +// Iterates over a vector of TestCases, keeping a running sum of the +// results of calling a given int-returning method on each. +// Returns the sum. +static int SumOverTestCaseList(const std::vector<TestCase*>& case_list, + int (TestCase::*method)() const) { + int sum = 0; + for (size_t i = 0; i < case_list.size(); i++) { + sum += (case_list[i]->*method)(); + } + return sum; +} + +// Returns true iff the test case passed. +static bool TestCasePassed(const TestCase* test_case) { + return test_case->should_run() && test_case->Passed(); +} + +// Returns true iff the test case failed. +static bool TestCaseFailed(const TestCase* test_case) { + return test_case->should_run() && test_case->Failed(); +} + +// Returns true iff test_case contains at least one test that should +// run. +static bool ShouldRunTestCase(const TestCase* test_case) { + return test_case->should_run(); +} + +// AssertHelper constructor. +AssertHelper::AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message) + : data_(new AssertHelperData(type, file, line, message)) { +} + +AssertHelper::~AssertHelper() { + delete data_; +} + +// Message assignment, for assertion streaming support. +void AssertHelper::operator=(const Message& message) const { + UnitTest::GetInstance()-> + AddTestPartResult(data_->type, data_->file, data_->line, + AppendUserMessage(data_->message, message), + UnitTest::GetInstance()->impl() + ->CurrentOsStackTraceExceptTop(1) + // Skips the stack frame for this function itself. + ); // NOLINT +} + +// Mutex for linked pointers. +GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// Application pathname gotten in InitGoogleTest. +std::string g_executable_path; + +// Returns the current application's name, removing directory path if that +// is present. +FilePath GetCurrentExecutableName() { + FilePath result; + +#if GTEST_OS_WINDOWS + result.Set(FilePath(g_executable_path).RemoveExtension("exe")); +#else + result.Set(FilePath(g_executable_path)); +#endif // GTEST_OS_WINDOWS + + return result.RemoveDirectoryName(); +} + +// Functions for processing the gtest_output flag. + +// Returns the output format, or "" for normal printed output. +std::string UnitTestOptions::GetOutputFormat() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) return std::string(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + return (colon == NULL) ? + std::string(gtest_output_flag) : + std::string(gtest_output_flag, colon - gtest_output_flag); +} + +// Returns the name of the requested output file, or the default if none +// was explicitly specified. +std::string UnitTestOptions::GetAbsolutePathToOutputFile() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) + return ""; + + const char* const colon = strchr(gtest_output_flag, ':'); + if (colon == NULL) + return internal::FilePath::ConcatPaths( + internal::FilePath( + UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(kDefaultOutputFile)).string(); + + internal::FilePath output_name(colon + 1); + if (!output_name.IsAbsolutePath()) + // TODO(wan@google.com): on Windows \some\path is not an absolute + // path (as its meaning depends on the current drive), yet the + // following logic for turning it into an absolute path is wrong. + // Fix it. + output_name = internal::FilePath::ConcatPaths( + internal::FilePath(UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(colon + 1)); + + if (!output_name.IsDirectory()) + return output_name.string(); + + internal::FilePath result(internal::FilePath::GenerateUniqueFileName( + output_name, internal::GetCurrentExecutableName(), + GetOutputFormat().c_str())); + return result.string(); +} + +// Returns true iff the wildcard pattern matches the string. The +// first ':' or '\0' character in pattern marks the end of it. +// +// This recursive algorithm isn't very efficient, but is clear and +// works well enough for matching test names, which are short. +bool UnitTestOptions::PatternMatchesString(const char *pattern, + const char *str) { + switch (*pattern) { + case '\0': + case ':': // Either ':' or '\0' marks the end of the pattern. + return *str == '\0'; + case '?': // Matches any single character. + return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); + case '*': // Matches any string (possibly empty) of characters. + return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || + PatternMatchesString(pattern + 1, str); + default: // Non-special character. Matches itself. + return *pattern == *str && + PatternMatchesString(pattern + 1, str + 1); + } +} + +bool UnitTestOptions::MatchesFilter( + const std::string& name, const char* filter) { + const char *cur_pattern = filter; + for (;;) { + if (PatternMatchesString(cur_pattern, name.c_str())) { + return true; + } + + // Finds the next pattern in the filter. + cur_pattern = strchr(cur_pattern, ':'); + + // Returns if no more pattern can be found. + if (cur_pattern == NULL) { + return false; + } + + // Skips the pattern separater (the ':' character). + cur_pattern++; + } +} + +// Returns true iff the user-specified filter matches the test case +// name and the test name. +bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name) { + const std::string& full_name = test_case_name + "." + test_name.c_str(); + + // Split --gtest_filter at '-', if there is one, to separate into + // positive filter and negative filter portions + const char* const p = GTEST_FLAG(filter).c_str(); + const char* const dash = strchr(p, '-'); + std::string positive; + std::string negative; + if (dash == NULL) { + positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter + negative = ""; + } else { + positive = std::string(p, dash); // Everything up to the dash + negative = std::string(dash + 1); // Everything after the dash + if (positive.empty()) { + // Treat '-test1' as the same as '*-test1' + positive = kUniversalFilter; + } + } + + // A filter is a colon-separated list of patterns. It matches a + // test if any pattern in it matches the test. + return (MatchesFilter(full_name, positive.c_str()) && + !MatchesFilter(full_name, negative.c_str())); +} + +#if GTEST_HAS_SEH +// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the +// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. +// This function is useful as an __except condition. +int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { + // Google Test should handle a SEH exception if: + // 1. the user wants it to, AND + // 2. this is not a breakpoint exception, AND + // 3. this is not a C++ exception (VC++ implements them via SEH, + // apparently). + // + // SEH exception code for C++ exceptions. + // (see http://support.microsoft.com/kb/185294 for more information). + const DWORD kCxxExceptionCode = 0xe06d7363; + + bool should_handle = true; + + if (!GTEST_FLAG(catch_exceptions)) + should_handle = false; + else if (exception_code == EXCEPTION_BREAKPOINT) + should_handle = false; + else if (exception_code == kCxxExceptionCode) + should_handle = false; + + return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; +} +#endif // GTEST_HAS_SEH + +} // namespace internal + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. Intercepts only failures from the current thread. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + TestPartResultArray* result) + : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), + result_(result) { + Init(); +} + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + InterceptMode intercept_mode, TestPartResultArray* result) + : intercept_mode_(intercept_mode), + result_(result) { + Init(); +} + +void ScopedFakeTestPartResultReporter::Init() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + old_reporter_ = impl->GetGlobalTestPartResultReporter(); + impl->SetGlobalTestPartResultReporter(this); + } else { + old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); + impl->SetTestPartResultReporterForCurrentThread(this); + } +} + +// The d'tor restores the test part result reporter used by Google Test +// before. +ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + impl->SetGlobalTestPartResultReporter(old_reporter_); + } else { + impl->SetTestPartResultReporterForCurrentThread(old_reporter_); + } +} + +// Increments the test part result count and remembers the result. +// This method is from the TestPartResultReporterInterface interface. +void ScopedFakeTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + result_->Append(result); +} + +namespace internal { + +// Returns the type ID of ::testing::Test. We should always call this +// instead of GetTypeId< ::testing::Test>() to get the type ID of +// testing::Test. This is to work around a suspected linker bug when +// using Google Test as a framework on Mac OS X. The bug causes +// GetTypeId< ::testing::Test>() to return different values depending +// on whether the call is from the Google Test framework itself or +// from user test code. GetTestTypeId() is guaranteed to always +// return the same value, as it always calls GetTypeId<>() from the +// gtest.cc, which is within the Google Test framework. +TypeId GetTestTypeId() { + return GetTypeId<Test>(); +} + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); + +// This predicate-formatter checks that 'results' contains a test part +// failure of the given type and that the failure message contains the +// given substring. +AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResult::Type type, + const string& substr) { + const std::string expected(type == TestPartResult::kFatalFailure ? + "1 fatal failure" : + "1 non-fatal failure"); + Message msg; + if (results.size() != 1) { + msg << "Expected: " << expected << "\n" + << " Actual: " << results.size() << " failures"; + for (int i = 0; i < results.size(); i++) { + msg << "\n" << results.GetTestPartResult(i); + } + return AssertionFailure() << msg; + } + + const TestPartResult& r = results.GetTestPartResult(0); + if (r.type() != type) { + return AssertionFailure() << "Expected: " << expected << "\n" + << " Actual:\n" + << r; + } + + if (strstr(r.message(), substr.c_str()) == NULL) { + return AssertionFailure() << "Expected: " << expected << " containing \"" + << substr << "\"\n" + << " Actual:\n" + << r; + } + + return AssertionSuccess(); +} + +// The constructor of SingleFailureChecker remembers where to look up +// test part results, what type of failure we expect, and what +// substring the failure message should contain. +SingleFailureChecker:: SingleFailureChecker( + const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr) + : results_(results), + type_(type), + substr_(substr) {} + +// The destructor of SingleFailureChecker verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +SingleFailureChecker::~SingleFailureChecker() { + EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); +} + +DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultGlobalTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->current_test_result()->AddTestPartResult(result); + unit_test_->listeners()->repeater()->OnTestPartResult(result); +} + +DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); +} + +// Returns the global test part result reporter. +TestPartResultReporterInterface* +UnitTestImpl::GetGlobalTestPartResultReporter() { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + return global_test_part_result_repoter_; +} + +// Sets the global test part result reporter. +void UnitTestImpl::SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter) { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + global_test_part_result_repoter_ = reporter; +} + +// Returns the test part result reporter for the current thread. +TestPartResultReporterInterface* +UnitTestImpl::GetTestPartResultReporterForCurrentThread() { + return per_thread_test_part_result_reporter_.get(); +} + +// Sets the test part result reporter for the current thread. +void UnitTestImpl::SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter) { + per_thread_test_part_result_reporter_.set(reporter); +} + +// Gets the number of successful test cases. +int UnitTestImpl::successful_test_case_count() const { + return CountIf(test_cases_, TestCasePassed); +} + +// Gets the number of failed test cases. +int UnitTestImpl::failed_test_case_count() const { + return CountIf(test_cases_, TestCaseFailed); +} + +// Gets the number of all test cases. +int UnitTestImpl::total_test_case_count() const { + return static_cast<int>(test_cases_.size()); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTestImpl::test_case_to_run_count() const { + return CountIf(test_cases_, ShouldRunTestCase); +} + +// Gets the number of successful tests. +int UnitTestImpl::successful_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); +} + +// Gets the number of failed tests. +int UnitTestImpl::failed_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTestImpl::reportable_disabled_test_count() const { + return SumOverTestCaseList(test_cases_, + &TestCase::reportable_disabled_test_count); +} + +// Gets the number of disabled tests. +int UnitTestImpl::disabled_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTestImpl::reportable_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); +} + +// Gets the number of all tests. +int UnitTestImpl::total_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); +} + +// Gets the number of tests that should run. +int UnitTestImpl::test_to_run_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// CurrentOsStackTraceExceptTop(1), Foo() will be included in the +// trace but Bar() and CurrentOsStackTraceExceptTop() won't. +std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { + (void)skip_count; + return ""; +} + +// Returns the current time in milliseconds. +TimeInMillis GetTimeInMillis() { +#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) + // Difference between 1970-01-01 and 1601-01-01 in milliseconds. + // http://analogous.blogspot.com/2005/04/epoch.html + const TimeInMillis kJavaEpochToWinFileTimeDelta = + static_cast<TimeInMillis>(116444736UL) * 100000UL; + const DWORD kTenthMicrosInMilliSecond = 10000; + + SYSTEMTIME now_systime; + FILETIME now_filetime; + ULARGE_INTEGER now_int64; + // TODO(kenton@google.com): Shouldn't this just use + // GetSystemTimeAsFileTime()? + GetSystemTime(&now_systime); + if (SystemTimeToFileTime(&now_systime, &now_filetime)) { + now_int64.LowPart = now_filetime.dwLowDateTime; + now_int64.HighPart = now_filetime.dwHighDateTime; + now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - + kJavaEpochToWinFileTimeDelta; + return now_int64.QuadPart; + } + return 0; +#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ + __timeb64 now; + +# ifdef _MSC_VER + + // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 + // (deprecated function) there. + // TODO(kenton@google.com): Use GetTickCount()? Or use + // SystemTimeToFileTime() +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4996) // Temporarily disables warning 4996. + _ftime64(&now); +# pragma warning(pop) // Restores the warning state. +# else + + _ftime64(&now); + +# endif // _MSC_VER + + return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm; +#elif GTEST_HAS_GETTIMEOFDAY_ + struct timeval now; + gettimeofday(&now, NULL); + return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000; +#else +# error "Don't know how to get the current time on your system." +#endif +} + +// Utilities + +// class String. + +#if GTEST_OS_WINDOWS_MOBILE +// Creates a UTF-16 wide string from the given ANSI string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the wide string, or NULL if the +// input is NULL. +LPCWSTR String::AnsiToUtf16(const char* ansi) { + if (!ansi) return NULL; + const int length = strlen(ansi); + const int unicode_length = + MultiByteToWideChar(CP_ACP, 0, ansi, length, + NULL, 0); + WCHAR* unicode = new WCHAR[unicode_length + 1]; + MultiByteToWideChar(CP_ACP, 0, ansi, length, + unicode, unicode_length); + unicode[unicode_length] = 0; + return unicode; +} + +// Creates an ANSI string from the given wide string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the ANSI string, or NULL if the +// input is NULL. +const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { + if (!utf16_str) return NULL; + const int ansi_length = + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + NULL, 0, NULL, NULL); + char* ansi = new char[ansi_length + 1]; + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + ansi, ansi_length, NULL, NULL); + ansi[ansi_length] = 0; + return ansi; +} + +#endif // GTEST_OS_WINDOWS_MOBILE + +// Compares two C strings. Returns true iff they have the same content. +// +// Unlike strcmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + + return strcmp(lhs, rhs) == 0; +} + +#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +// Converts an array of wide chars to a narrow string using the UTF-8 +// encoding, and streams the result to the given Message object. +static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, + Message* msg) { + for (size_t i = 0; i != length; ) { // NOLINT + if (wstr[i] != L'\0') { + *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i)); + while (i != length && wstr[i] != L'\0') + i++; + } else { + *msg << '\0'; + i++; + } + } +} + +#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +} // namespace internal + +// Constructs an empty Message. +// We allocate the stringstream separately because otherwise each use of +// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's +// stack frame leading to huge stack frames in some cases; gcc does not reuse +// the stack space. +Message::Message() : ss_(new ::std::stringstream) { + // By default, we want there to be enough precision when printing + // a double to a Message. + *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2); +} + +// These two overloads allow streaming a wide C string to a Message +// using the UTF-8 encoding. +Message& Message::operator <<(const wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} +Message& Message::operator <<(wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} + +#if GTEST_HAS_STD_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::std::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Gets the text streamed to this object so far as an std::string. +// Each '\0' character in the buffer is replaced with "\\0". +std::string Message::GetString() const { + return internal::StringStreamToString(ss_.get()); +} + +// AssertionResult constructors. +// Used in EXPECT_TRUE/FALSE(assertion_result). +AssertionResult::AssertionResult(const AssertionResult& other) + : success_(other.success_), + message_(other.message_.get() != NULL ? + new ::std::string(*other.message_) : + static_cast< ::std::string*>(NULL)) { +} + +// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. +AssertionResult AssertionResult::operator!() const { + AssertionResult negation(!success_); + if (message_.get() != NULL) + negation << *message_; + return negation; +} + +// Makes a successful assertion result. +AssertionResult AssertionSuccess() { + return AssertionResult(true); +} + +// Makes a failed assertion result. +AssertionResult AssertionFailure() { + return AssertionResult(false); +} + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << message. +AssertionResult AssertionFailure(const Message& message) { + return AssertionFailure() << message; +} + +namespace internal { + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const std::string& expected_value, + const std::string& actual_value, + bool ignoring_case) { + Message msg; + msg << "Value of: " << actual_expression; + if (actual_value != actual_expression) { + msg << "\n Actual: " << actual_value; + } + + msg << "\nExpected: " << expected_expression; + if (ignoring_case) { + msg << " (ignoring case)"; + } + if (expected_value != expected_expression) { + msg << "\nWhich is: " << expected_value; + } + + return AssertionFailure() << msg; +} + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value) { + const char* actual_message = assertion_result.message(); + Message msg; + msg << "Value of: " << expression_text + << "\n Actual: " << actual_predicate_value; + if (actual_message[0] != '\0') + msg << " (" << actual_message << ")"; + msg << "\nExpected: " << expected_predicate_value; + return msg.GetString(); +} + +// Helper function for implementing ASSERT_NEAR. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error) { + const double diff = fabs(val1 - val2); + if (diff <= abs_error) return AssertionSuccess(); + + // TODO(wan): do not print the value of an expression if it's + // already a literal. + return AssertionFailure() + << "The difference between " << expr1 << " and " << expr2 + << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" + << expr1 << " evaluates to " << val1 << ",\n" + << expr2 << " evaluates to " << val2 << ", and\n" + << abs_error_expr << " evaluates to " << abs_error << "."; +} + + +// Helper template for implementing FloatLE() and DoubleLE(). +template <typename RawType> +AssertionResult FloatingPointLE(const char* expr1, + const char* expr2, + RawType val1, + RawType val2) { + // Returns success if val1 is less than val2, + if (val1 < val2) { + return AssertionSuccess(); + } + + // or if val1 is almost equal to val2. + const FloatingPoint<RawType> lhs(val1), rhs(val2); + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + // Note that the above two checks will both fail if either val1 or + // val2 is NaN, as the IEEE floating-point standard requires that + // any predicate involving a NaN must return false. + + ::std::stringstream val1_ss; + val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << val1; + + ::std::stringstream val2_ss; + val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << val2; + + return AssertionFailure() + << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" + << " Actual: " << StringStreamToString(&val1_ss) << " vs " + << StringStreamToString(&val2_ss); +} + +} // namespace internal + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2) { + return internal::FloatingPointLE<float>(expr1, expr2, val1, val2); +} + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2) { + return internal::FloatingPointLE<double>(expr1, expr2, val1, val2); +} + +namespace internal { + +// The helper function for {ASSERT|EXPECT}_EQ with int or enum +// arguments. +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + if (expected == actual) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here +// just to avoid copy-and-paste of similar code. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +} + +// Implements the helper function for {ASSERT|EXPECT}_NE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + false); +} + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CaseInsensitiveCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + true); +} + +// The helper function for {ASSERT|EXPECT}_STRNE. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CaseInsensitiveCStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() + << "Expected: (" << s1_expression << ") != (" + << s2_expression << ") (ignoring case), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +} // namespace internal + +namespace { + +// Helper functions for implementing IsSubString() and IsNotSubstring(). + +// This group of overloaded functions return true iff needle is a +// substring of haystack. NULL is considered a substring of itself +// only. + +bool IsSubstringPred(const char* needle, const char* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return strstr(haystack, needle) != NULL; +} + +bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return wcsstr(haystack, needle) != NULL; +} + +// StringType here can be either ::std::string or ::std::wstring. +template <typename StringType> +bool IsSubstringPred(const StringType& needle, + const StringType& haystack) { + return haystack.find(needle) != StringType::npos; +} + +// This function implements either IsSubstring() or IsNotSubstring(), +// depending on the value of the expected_to_be_substring parameter. +// StringType here can be const char*, const wchar_t*, ::std::string, +// or ::std::wstring. +template <typename StringType> +AssertionResult IsSubstringImpl( + bool expected_to_be_substring, + const char* needle_expr, const char* haystack_expr, + const StringType& needle, const StringType& haystack) { + if (IsSubstringPred(needle, haystack) == expected_to_be_substring) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(needle[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure() + << "Value of: " << needle_expr << "\n" + << " Actual: " << begin_string_quote << needle << "\"\n" + << "Expected: " << (expected_to_be_substring ? "" : "not ") + << "a substring of " << haystack_expr << "\n" + << "Which is: " << begin_string_quote << haystack << "\""; +} + +} // namespace + +// IsSubstring() and IsNotSubstring() check whether needle is a +// substring of haystack (NULL is considered a substring of itself +// only), and return an appropriate error message when they fail. + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +#if GTEST_OS_WINDOWS + +namespace { + +// Helper function for IsHRESULT{SuccessFailure} predicates +AssertionResult HRESULTFailureHelper(const char* expr, + const char* expected, + long hr) { // NOLINT +# if GTEST_OS_WINDOWS_MOBILE + + // Windows CE doesn't support FormatMessage. + const char error_text[] = ""; + +# else + + // Looks up the human-readable system message for the HRESULT code + // and since we're not passing any params to FormatMessage, we don't + // want inserts expanded. + const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD kBufSize = 4096; + // Gets the system's human readable message string for this HRESULT. + char error_text[kBufSize] = { '\0' }; + DWORD message_length = ::FormatMessageA(kFlags, + 0, // no source, we're asking system + hr, // the error + 0, // no line width restrictions + error_text, // output buffer + kBufSize, // buf size + NULL); // no arguments for inserts + // Trims tailing white space (FormatMessage leaves a trailing CR-LF) + for (; message_length && IsSpace(error_text[message_length - 1]); + --message_length) { + error_text[message_length - 1] = '\0'; + } + +# endif // GTEST_OS_WINDOWS_MOBILE + + const std::string error_hex("0x" + String::FormatHexInt(hr)); + return ::testing::AssertionFailure() + << "Expected: " << expr << " " << expected << ".\n" + << " Actual: " << error_hex << " " << error_text << "\n"; +} + +} // namespace + +AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT + if (SUCCEEDED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "succeeds", hr); +} + +AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT + if (FAILED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "fails", hr); +} + +#endif // GTEST_OS_WINDOWS + +// Utility functions for encoding Unicode text (wide strings) in +// UTF-8. + +// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// like this: +// +// Code-point length Encoding +// 0 - 7 bits 0xxxxxxx +// 8 - 11 bits 110xxxxx 10xxxxxx +// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx +// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +// The maximum code-point a one-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) << 7) - 1; + +// The maximum code-point a two-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1; + +// The maximum code-point a three-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1; + +// The maximum code-point a four-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1; + +// Chops off the n lowest bits from a bit pattern. Returns the n +// lowest bits. As a side effect, the original bit pattern will be +// shifted to the right by n bits. +inline UInt32 ChopLowBits(UInt32* bits, int n) { + const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1); + *bits >>= n; + return low_bits; +} + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +std::string CodePointToUtf8(UInt32 code_point) { + if (code_point > kMaxCodePoint4) { + return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")"; + } + + char str[5]; // Big enough for the largest valid code point. + if (code_point <= kMaxCodePoint1) { + str[1] = '\0'; + str[0] = static_cast<char>(code_point); // 0xxxxxxx + } else if (code_point <= kMaxCodePoint2) { + str[2] = '\0'; + str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xC0 | code_point); // 110xxxxx + } else if (code_point <= kMaxCodePoint3) { + str[3] = '\0'; + str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xE0 | code_point); // 1110xxxx + } else { // code_point <= kMaxCodePoint4 + str[4] = '\0'; + str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xF0 | code_point); // 11110xxx + } + return str; +} + +// The following two functions only make sense if the the system +// uses UTF-16 for wide string encoding. All supported systems +// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. + +// Determines if the arguments constitute UTF-16 surrogate pair +// and thus should be combined into a single Unicode code point +// using CreateCodePointFromUtf16SurrogatePair. +inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { + return sizeof(wchar_t) == 2 && + (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; +} + +// Creates a Unicode code point from UTF16 surrogate pair. +inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, + wchar_t second) { + const UInt32 mask = (1 << 10) - 1; + return (sizeof(wchar_t) == 2) ? + (((first & mask) << 10) | (second & mask)) + 0x10000 : + // This function should not be called when the condition is + // false, but we provide a sensible default in case it is. + static_cast<UInt32>(first); +} + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +std::string WideStringToUtf8(const wchar_t* str, int num_chars) { + if (num_chars == -1) + num_chars = static_cast<int>(wcslen(str)); + + ::std::stringstream stream; + for (int i = 0; i < num_chars; ++i) { + UInt32 unicode_code_point; + + if (str[i] == L'\0') { + break; + } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { + unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], + str[i + 1]); + i++; + } else { + unicode_code_point = static_cast<UInt32>(str[i]); + } + + stream << CodePointToUtf8(unicode_code_point); + } + return StringStreamToString(&stream); +} + +// Converts a wide C string to an std::string using the UTF-8 encoding. +// NULL will be converted to "(null)". +std::string String::ShowWideCString(const wchar_t * wide_c_str) { + if (wide_c_str == NULL) return "(null)"; + + return internal::WideStringToUtf8(wide_c_str, -1); +} + +// Compares two wide C strings. Returns true iff they have the same +// content. +// +// Unlike wcscmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + + return wcscmp(lhs, rhs) == 0; +} + +// Helper function for *_STREQ on wide strings. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual) { + if (String::WideCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + false); +} + +// Helper function for *_STRNE on wide strings. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2) { + if (!String::WideCStringEquals(s1, s2)) { + return AssertionSuccess(); + } + + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: " + << PrintToString(s1) + << " vs " << PrintToString(s2); +} + +// Compares two C strings, ignoring case. Returns true iff they have +// the same content. +// +// Unlike strcasecmp(), this function can handle NULL argument(s). A +// NULL C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { + if (lhs == NULL) + return rhs == NULL; + if (rhs == NULL) + return false; + return posix::StrCaseCmp(lhs, rhs) == 0; +} + + // Compares two wide C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike wcscasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL wide C string, + // including the empty string. + // NB: The implementations on different platforms slightly differ. + // On windows, this method uses _wcsicmp which compares according to LC_CTYPE + // environment variable. On GNU platform this method uses wcscasecmp + // which compares according to LC_CTYPE category of the current locale. + // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the + // current locale. +bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs, + const wchar_t* rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + +#if GTEST_OS_WINDOWS + return _wcsicmp(lhs, rhs) == 0; +#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID + return wcscasecmp(lhs, rhs) == 0; +#else + // Android, Mac OS X and Cygwin don't define wcscasecmp. + // Other unknown OSes may not define it either. + wint_t left, right; + do { + left = towlower(*lhs++); + right = towlower(*rhs++); + } while (left && left == right); + return left == right; +#endif // OS selector +} + +// Returns true iff str ends with the given suffix, ignoring case. +// Any string is considered to end with an empty suffix. +bool String::EndsWithCaseInsensitive( + const std::string& str, const std::string& suffix) { + const size_t str_len = str.length(); + const size_t suffix_len = suffix.length(); + return (str_len >= suffix_len) && + CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len, + suffix.c_str()); +} + +// Formats an int value as "%02d". +std::string String::FormatIntWidth2(int value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << value; + return ss.str(); +} + +// Formats an int value as "%X". +std::string String::FormatHexInt(int value) { + std::stringstream ss; + ss << std::hex << std::uppercase << value; + return ss.str(); +} + +// Formats a byte as "%02X". +std::string String::FormatByte(unsigned char value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase + << static_cast<unsigned int>(value); + return ss.str(); +} + +// Converts the buffer in a stringstream to an std::string, converting NUL +// bytes to "\\0" along the way. +std::string StringStreamToString(::std::stringstream* ss) { + const ::std::string& str = ss->str(); + const char* const start = str.c_str(); + const char* const end = start + str.length(); + + std::string result; + result.reserve(2 * (end - start)); + for (const char* ch = start; ch != end; ++ch) { + if (*ch == '\0') { + result += "\\0"; // Replaces NUL with "\\0"; + } else { + result += *ch; + } + } + + return result; +} + +// Appends the user-supplied message to the Google-Test-generated message. +std::string AppendUserMessage(const std::string& gtest_msg, + const Message& user_msg) { + // Appends the user message if it's non-empty. + const std::string user_msg_string = user_msg.GetString(); + if (user_msg_string.empty()) { + return gtest_msg; + } + + return gtest_msg + "\n" + user_msg_string; +} + +} // namespace internal + +// class TestResult + +// Creates an empty TestResult. +TestResult::TestResult() + : death_test_count_(0), + elapsed_time_(0) { +} + +// D'tor. +TestResult::~TestResult() { +} + +// Returns the i-th test part result among all the results. i can +// range from 0 to total_part_count() - 1. If i is not in that range, +// aborts the program. +const TestPartResult& TestResult::GetTestPartResult(int i) const { + if (i < 0 || i >= total_part_count()) + internal::posix::Abort(); + return test_part_results_.at(i); +} + +// Returns the i-th test property. i can range from 0 to +// test_property_count() - 1. If i is not in that range, aborts the +// program. +const TestProperty& TestResult::GetTestProperty(int i) const { + if (i < 0 || i >= test_property_count()) + internal::posix::Abort(); + return test_properties_.at(i); +} + +// Clears the test part results. +void TestResult::ClearTestPartResults() { + test_part_results_.clear(); +} + +// Adds a test part result to the list. +void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { + test_part_results_.push_back(test_part_result); +} + +// Adds a test property to the list. If a property with the same key as the +// supplied property is already represented, the value of this test_property +// replaces the old value for that key. +void TestResult::RecordProperty(const std::string& xml_element, + const TestProperty& test_property) { + if (!ValidateTestProperty(xml_element, test_property)) { + return; + } + internal::MutexLock lock(&test_properites_mutex_); + const std::vector<TestProperty>::iterator property_with_matching_key = + std::find_if(test_properties_.begin(), test_properties_.end(), + internal::TestPropertyKeyIs(test_property.key())); + if (property_with_matching_key == test_properties_.end()) { + test_properties_.push_back(test_property); + return; + } + property_with_matching_key->SetValue(test_property.value()); +} + +// The list of reserved attributes used in the <testsuites> element of XML +// output. +static const char* const kReservedTestSuitesAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "random_seed", + "tests", + "time", + "timestamp" +}; + +// The list of reserved attributes used in the <testsuite> element of XML +// output. +static const char* const kReservedTestSuiteAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "tests", + "time" +}; + +// The list of reserved attributes used in the <testcase> element of XML output. +static const char* const kReservedTestCaseAttributes[] = { + "classname", + "name", + "status", + "time", + "type_param", + "value_param" +}; + +template <int kSize> +std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) { + return std::vector<std::string>(array, array + kSize); +} + +static std::vector<std::string> GetReservedAttributesForElement( + const std::string& xml_element) { + if (xml_element == "testsuites") { + return ArrayAsVector(kReservedTestSuitesAttributes); + } else if (xml_element == "testsuite") { + return ArrayAsVector(kReservedTestSuiteAttributes); + } else if (xml_element == "testcase") { + return ArrayAsVector(kReservedTestCaseAttributes); + } else { + GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element; + } + // This code is unreachable but some compilers may not realizes that. + return std::vector<std::string>(); +} + +static std::string FormatWordList(const std::vector<std::string>& words) { + Message word_list; + for (size_t i = 0; i < words.size(); ++i) { + if (i > 0 && words.size() > 2) { + word_list << ", "; + } + if (i == words.size() - 1) { + word_list << "and "; + } + word_list << "'" << words[i] << "'"; + } + return word_list.GetString(); +} + +bool ValidateTestPropertyName(const std::string& property_name, + const std::vector<std::string>& reserved_names) { + if (std::find(reserved_names.begin(), reserved_names.end(), property_name) != + reserved_names.end()) { + ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name + << " (" << FormatWordList(reserved_names) + << " are reserved by " << GTEST_NAME_ << ")"; + return false; + } + return true; +} + +// Adds a failure if the key is a reserved attribute of the element named +// xml_element. Returns true if the property is valid. +bool TestResult::ValidateTestProperty(const std::string& xml_element, + const TestProperty& test_property) { + return ValidateTestPropertyName(test_property.key(), + GetReservedAttributesForElement(xml_element)); +} + +// Clears the object. +void TestResult::Clear() { + test_part_results_.clear(); + test_properties_.clear(); + death_test_count_ = 0; + elapsed_time_ = 0; +} + +// Returns true iff the test failed. +bool TestResult::Failed() const { + for (int i = 0; i < total_part_count(); ++i) { + if (GetTestPartResult(i).failed()) + return true; + } + return false; +} + +// Returns true iff the test part fatally failed. +static bool TestPartFatallyFailed(const TestPartResult& result) { + return result.fatally_failed(); +} + +// Returns true iff the test fatally failed. +bool TestResult::HasFatalFailure() const { + return CountIf(test_part_results_, TestPartFatallyFailed) > 0; +} + +// Returns true iff the test part non-fatally failed. +static bool TestPartNonfatallyFailed(const TestPartResult& result) { + return result.nonfatally_failed(); +} + +// Returns true iff the test has a non-fatal failure. +bool TestResult::HasNonfatalFailure() const { + return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0; +} + +// Gets the number of all test parts. This is the sum of the number +// of successful test parts and the number of failed test parts. +int TestResult::total_part_count() const { + return static_cast<int>(test_part_results_.size()); +} + +// Returns the number of the test properties. +int TestResult::test_property_count() const { + return static_cast<int>(test_properties_.size()); +} + +// class Test + +// Creates a Test object. + +// The c'tor saves the values of all Google Test flags. +Test::Test() + : gtest_flag_saver_(new internal::GTestFlagSaver) { +} + +// The d'tor restores the values of all Google Test flags. +Test::~Test() { + delete gtest_flag_saver_; +} + +// Sets up the test fixture. +// +// A sub-class may override this. +void Test::SetUp() { +} + +// Tears down the test fixture. +// +// A sub-class may override this. +void Test::TearDown() { +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, const std::string& value) { + UnitTest::GetInstance()->RecordProperty(key, value); +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, int value) { + Message value_message; + value_message << value; + RecordProperty(key, value_message.GetString().c_str()); +} + +namespace internal { + +void ReportFailureInUnknownLocation(TestPartResult::Type result_type, + const std::string& message) { + // This function is a friend of UnitTest and as such has access to + // AddTestPartResult. + UnitTest::GetInstance()->AddTestPartResult( + result_type, + NULL, // No info about the source file where the exception occurred. + -1, // We have no info on which line caused the exception. + message, + ""); // No stack trace, either. +} + +} // namespace internal + +// Google Test requires all tests in the same test case to use the same test +// fixture class. This function checks if the current test has the +// same fixture class as the first test in the current test case. If +// yes, it returns true; otherwise it generates a Google Test failure and +// returns false. +bool Test::HasSameFixtureClass() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + const TestCase* const test_case = impl->current_test_case(); + + // Info about the first test in the current test case. + const TestInfo* const first_test_info = test_case->test_info_list()[0]; + const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_; + const char* const first_test_name = first_test_info->name(); + + // Info about the current test. + const TestInfo* const this_test_info = impl->current_test_info(); + const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_; + const char* const this_test_name = this_test_info->name(); + + if (this_fixture_id != first_fixture_id) { + // Is the first test defined using TEST? + const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId(); + // Is this test defined using TEST? + const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId(); + + if (first_is_TEST || this_is_TEST) { + // The user mixed TEST and TEST_F in this test case - we'll tell + // him/her how to fix it. + + // Gets the name of the TEST and the name of the TEST_F. Note + // that first_is_TEST and this_is_TEST cannot both be true, as + // the fixture IDs are different for the two tests. + const char* const TEST_name = + first_is_TEST ? first_test_name : this_test_name; + const char* const TEST_F_name = + first_is_TEST ? this_test_name : first_test_name; + + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class, so mixing TEST_F and TEST in the same test case is\n" + << "illegal. In test case " << this_test_info->test_case_name() + << ",\n" + << "test " << TEST_F_name << " is defined using TEST_F but\n" + << "test " << TEST_name << " is defined using TEST. You probably\n" + << "want to change the TEST to TEST_F or move it to another test\n" + << "case."; + } else { + // The user defined two fixture classes with the same name in + // two namespaces - we'll tell him/her how to fix it. + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " + << this_test_info->test_case_name() << ",\n" + << "you defined test " << first_test_name + << " and test " << this_test_name << "\n" + << "using two different test fixture classes. This can happen if\n" + << "the two classes are from different namespaces or translation\n" + << "units and have the same name. You should probably rename one\n" + << "of the classes to put the tests into different test cases."; + } + return false; + } + + return true; +} + +#if GTEST_HAS_SEH + +// Adds an "exception thrown" fatal failure to the current test. This +// function returns its result via an output parameter pointer because VC++ +// prohibits creation of objects with destructors on stack in functions +// using __try (see error C2712). +static std::string* FormatSehExceptionMessage(DWORD exception_code, + const char* location) { + Message message; + message << "SEH exception with code 0x" << std::setbase(16) << + exception_code << std::setbase(10) << " thrown in " << location << "."; + + return new std::string(message.GetString()); +} + +#endif // GTEST_HAS_SEH + +namespace internal { + +#if GTEST_HAS_EXCEPTIONS + +// Adds an "exception thrown" fatal failure to the current test. +static std::string FormatCxxExceptionMessage(const char* description, + const char* location) { + Message message; + if (description != NULL) { + message << "C++ exception with description \"" << description << "\""; + } else { + message << "Unknown C++ exception"; + } + message << " thrown in " << location << "."; + + return message.GetString(); +} + +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result); + +GoogleTestFailureException::GoogleTestFailureException( + const TestPartResult& failure) + : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {} + +#endif // GTEST_HAS_EXCEPTIONS + +// We put these helper functions in the internal namespace as IBM's xlC +// compiler rejects the code if they were declared static. + +// Runs the given method and handles SEH exceptions it throws, when +// SEH is supported; returns the 0-value for type Result in case of an +// SEH exception. (Microsoft compilers cannot handle SEH and C++ +// exceptions in the same function. Therefore, we provide a separate +// wrapper function for handling SEH exceptions.) +template <class T, typename Result> +Result HandleSehExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { +#if GTEST_HAS_SEH + __try { + return (object->*method)(); + } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT + GetExceptionCode())) { + // We create the exception message on the heap because VC++ prohibits + // creation of objects with destructors on stack in functions using __try + // (see error C2712). + std::string* exception_message = FormatSehExceptionMessage( + GetExceptionCode(), location); + internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure, + *exception_message); + delete exception_message; + return static_cast<Result>(0); + } +#else + (void)location; + return (object->*method)(); +#endif // GTEST_HAS_SEH +} + +// Runs the given method and catches and reports C++ and/or SEH-style +// exceptions, if they are supported; returns the 0-value for type +// Result in case of an SEH exception. +template <class T, typename Result> +Result HandleExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { + // NOTE: The user code can affect the way in which Google Test handles + // exceptions by setting GTEST_FLAG(catch_exceptions), but only before + // RUN_ALL_TESTS() starts. It is technically possible to check the flag + // after the exception is caught and either report or re-throw the + // exception based on the flag's value: + // + // try { + // // Perform the test method. + // } catch (...) { + // if (GTEST_FLAG(catch_exceptions)) + // // Report the exception as failure. + // else + // throw; // Re-throws the original exception. + // } + // + // However, the purpose of this flag is to allow the program to drop into + // the debugger when the exception is thrown. On most platforms, once the + // control enters the catch block, the exception origin information is + // lost and the debugger will stop the program at the point of the + // re-throw in this function -- instead of at the point of the original + // throw statement in the code under test. For this reason, we perform + // the check early, sacrificing the ability to affect Google Test's + // exception handling in the method where the exception is thrown. + if (internal::GetUnitTestImpl()->catch_exceptions()) { +#if GTEST_HAS_EXCEPTIONS + try { + return HandleSehExceptionsInMethodIfSupported(object, method, location); + } catch (const internal::GoogleTestFailureException&) { // NOLINT + // This exception type can only be thrown by a failed Google + // Test assertion with the intention of letting another testing + // framework catch it. Therefore we just re-throw it. + throw; + } catch (const std::exception& e) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(e.what(), location)); + } catch (...) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(NULL, location)); + } + return static_cast<Result>(0); +#else + return HandleSehExceptionsInMethodIfSupported(object, method, location); +#endif // GTEST_HAS_EXCEPTIONS + } else { + return (object->*method)(); + } +} + +} // namespace internal + +// Runs the test and updates the test result. +void Test::Run() { + if (!HasSameFixtureClass()) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()"); + // We will run the test only if SetUp() was successful. + if (!HasFatalFailure()) { + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TestBody, "the test body"); + } + + // However, we want to clean up as much as possible. Hence we will + // always call TearDown(), even if SetUp() or the test body has + // failed. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TearDown, "TearDown()"); +} + +// Returns true iff the current test has a fatal failure. +bool Test::HasFatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); +} + +// Returns true iff the current test has a non-fatal failure. +bool Test::HasNonfatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()-> + HasNonfatalFailure(); +} + +// class TestInfo + +// Constructs a TestInfo object. It assumes ownership of the test factory +// object. +TestInfo::TestInfo(const std::string& a_test_case_name, + const std::string& a_name, + const char* a_type_param, + const char* a_value_param, + internal::TypeId fixture_class_id, + internal::TestFactoryBase* factory) + : test_case_name_(a_test_case_name), + name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + value_param_(a_value_param ? new std::string(a_value_param) : NULL), + fixture_class_id_(fixture_class_id), + should_run_(false), + is_disabled_(false), + matches_filter_(false), + factory_(factory), + result_() {} + +// Destructs a TestInfo object. +TestInfo::~TestInfo() { delete factory_; } + +namespace internal { + +// Creates a new TestInfo object and registers it with Google Test; +// returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// type_param: the name of the test's type parameter, or NULL if +// this is not a typed or a type-parameterized test. +// value_param: text representation of the test's value parameter, +// or NULL if this is not a value-parameterized test. +// fixture_class_id: ID of the test fixture class +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// factory: pointer to the factory that creates a test object. +// The newly created TestInfo instance will assume +// ownership of the factory object. +TestInfo* MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + TypeId fixture_class_id, + SetUpTestCaseFunc set_up_tc, + TearDownTestCaseFunc tear_down_tc, + TestFactoryBase* factory) { + TestInfo* const test_info = + new TestInfo(test_case_name, name, type_param, value_param, + fixture_class_id, factory); + GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); + return test_info; +} + +#if GTEST_HAS_PARAM_TEST +void ReportInvalidTestCaseType(const char* test_case_name, + const char* file, int line) { + Message errors; + errors + << "Attempted redefinition of test case " << test_case_name << ".\n" + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " << test_case_name << ", you tried\n" + << "to define a test using a fixture class different from the one\n" + << "used earlier. This can happen if the two fixture classes are\n" + << "from different namespaces and have the same name. You should\n" + << "probably rename one of the classes to put the tests into different\n" + << "test cases."; + + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors.GetString().c_str()); +} +#endif // GTEST_HAS_PARAM_TEST + +} // namespace internal + +namespace { + +// A predicate that checks the test name of a TestInfo against a known +// value. +// +// This is used for implementation of the TestCase class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestNameIs is copyable. + +//Commenting out this class since its not used and wherefor produces warnings +// class TestNameIs { +// public: +// // Constructor. +// // +// // TestNameIs has NO default constructor. +// explicit TestNameIs(const char* name) +// : name_(name) {} +// +// // Returns true iff the test name of test_info matches name_. +// bool operator()(const TestInfo * test_info) const { +// return test_info && test_info->name() == name_; +// } +// +// private: +// std::string name_; +//}; + +} // namespace + +namespace internal { + +// This method expands all parameterized tests registered with macros TEST_P +// and INSTANTIATE_TEST_CASE_P into regular tests and registers those. +// This will be done just once during the program runtime. +void UnitTestImpl::RegisterParameterizedTests() { +#if GTEST_HAS_PARAM_TEST + if (!parameterized_tests_registered_) { + parameterized_test_registry_.RegisterTests(); + parameterized_tests_registered_ = true; + } +#endif +} + +} // namespace internal + +// Creates the test object, runs it, records its result, and then +// deletes it. +void TestInfo::Run() { + if (!should_run_) return; + + // Tells UnitTest where to store test result. + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_info(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + // Notifies the unit test event listeners that a test is about to start. + repeater->OnTestStart(*this); + + const TimeInMillis start = internal::GetTimeInMillis(); + + impl->os_stack_trace_getter()->UponLeavingGTest(); + + // Creates the test object. + Test* const test = internal::HandleExceptionsInMethodIfSupported( + factory_, &internal::TestFactoryBase::CreateTest, + "the test fixture's constructor"); + + // Runs the test only if the test object was created and its + // constructor didn't generate a fatal failure. + if ((test != NULL) && !Test::HasFatalFailure()) { + // This doesn't throw as all user code that can throw are wrapped into + // exception handling code. + test->Run(); + } + + // Deletes the test object. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + test, &Test::DeleteSelf_, "the test fixture's destructor"); + + result_.set_elapsed_time(internal::GetTimeInMillis() - start); + + // Notifies the unit test event listener that a test has just finished. + repeater->OnTestEnd(*this); + + // Tells UnitTest to stop associating assertion results to this + // test. + impl->set_current_test_info(NULL); +} + +// class TestCase + +// Gets the number of successful tests in this test case. +int TestCase::successful_test_count() const { + return CountIf(test_info_list_, TestPassed); +} + +// Gets the number of failed tests in this test case. +int TestCase::failed_test_count() const { + return CountIf(test_info_list_, TestFailed); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int TestCase::reportable_disabled_test_count() const { + return CountIf(test_info_list_, TestReportableDisabled); +} + +// Gets the number of disabled tests in this test case. +int TestCase::disabled_test_count() const { + return CountIf(test_info_list_, TestDisabled); +} + +// Gets the number of tests to be printed in the XML report. +int TestCase::reportable_test_count() const { + return CountIf(test_info_list_, TestReportable); +} + +// Get the number of tests in this test case that should run. +int TestCase::test_to_run_count() const { + return CountIf(test_info_list_, ShouldRunTest); +} + +// Gets the number of all tests. +int TestCase::total_test_count() const { + return static_cast<int>(test_info_list_.size()); +} + +// Creates a TestCase with the given name. +// +// Arguments: +// +// name: name of the test case +// a_type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase::TestCase(const char* a_name, const char* a_type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) + : name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + set_up_tc_(set_up_tc), + tear_down_tc_(tear_down_tc), + should_run_(false), + elapsed_time_(0) { +} + +// Destructor of TestCase. +TestCase::~TestCase() { + // Deletes every Test in the collection. + ForEach(test_info_list_, internal::Delete<TestInfo>); +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +const TestInfo* TestCase::GetTestInfo(int i) const { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +TestInfo* TestCase::GetMutableTestInfo(int i) { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Adds a test to this test case. Will delete the test upon +// destruction of the TestCase object. +void TestCase::AddTestInfo(TestInfo * test_info) { + test_info_list_.push_back(test_info); + test_indices_.push_back(static_cast<int>(test_indices_.size())); +} + +// Runs every test in this TestCase. +void TestCase::Run() { + if (!should_run_) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_case(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + repeater->OnTestCaseStart(*this); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunSetUpTestCase, "SetUpTestCase()"); + + const internal::TimeInMillis start = internal::GetTimeInMillis(); + for (int i = 0; i < total_test_count(); i++) { + GetMutableTestInfo(i)->Run(); + } + elapsed_time_ = internal::GetTimeInMillis() - start; + + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunTearDownTestCase, "TearDownTestCase()"); + + repeater->OnTestCaseEnd(*this); + impl->set_current_test_case(NULL); +} + +// Clears the results of all tests in this test case. +void TestCase::ClearResult() { + ad_hoc_test_result_.Clear(); + ForEach(test_info_list_, TestInfo::ClearTestResult); +} + +// Shuffles the tests in this test case. +void TestCase::ShuffleTests(internal::Random* random) { + Shuffle(random, &test_indices_); +} + +// Restores the test order to before the first shuffle. +void TestCase::UnshuffleTests() { + for (size_t i = 0; i < test_indices_.size(); i++) { + test_indices_[i] = static_cast<int>(i); + } +} + +// Formats a countable noun. Depending on its quantity, either the +// singular form or the plural form is used. e.g. +// +// FormatCountableNoun(1, "formula", "formuli") returns "1 formula". +// FormatCountableNoun(5, "book", "books") returns "5 books". +static std::string FormatCountableNoun(int count, + const char * singular_form, + const char * plural_form) { + return internal::StreamableToString(count) + " " + + (count == 1 ? singular_form : plural_form); +} + +// Formats the count of tests. +static std::string FormatTestCount(int test_count) { + return FormatCountableNoun(test_count, "test", "tests"); +} + +// Formats the count of test cases. +static std::string FormatTestCaseCount(int test_case_count) { + return FormatCountableNoun(test_case_count, "test case", "test cases"); +} + +// Converts a TestPartResult::Type enum to human-friendly string +// representation. Both kNonFatalFailure and kFatalFailure are translated +// to "Failure", as the user usually doesn't care about the difference +// between the two when viewing the test result. +static const char * TestPartResultTypeToString(TestPartResult::Type type) { + switch (type) { + case TestPartResult::kSuccess: + return "Success"; + + case TestPartResult::kNonFatalFailure: + case TestPartResult::kFatalFailure: +#ifdef _MSC_VER + return "error: "; +#else + return "Failure\n"; +#endif + default: + return "Unknown result type"; + } +} + +namespace internal { + +// Prints a TestPartResult to an std::string. +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result) { + return (Message() + << internal::FormatFileLocation(test_part_result.file_name(), + test_part_result.line_number()) + << " " << TestPartResultTypeToString(test_part_result.type()) + << test_part_result.message()).GetString(); +} + +// Prints a TestPartResult. +static void PrintTestPartResult(const TestPartResult& test_part_result) { + const std::string& result = + PrintTestPartResultToString(test_part_result); + printf("%s\n", result.c_str()); + fflush(stdout); + // If the test program runs in Visual Studio or a debugger, the + // following statements add the test part result message to the Output + // window such that the user can double-click on it to jump to the + // corresponding source code location; otherwise they do nothing. +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + // We don't call OutputDebugString*() on Windows Mobile, as printing + // to stdout is done by OutputDebugString() there already - we don't + // want the same message printed twice. + ::OutputDebugStringA(result.c_str()); + ::OutputDebugStringA("\n"); +#endif +} + +// class PrettyUnitTestResultPrinter + +enum GTestColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW +}; + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + +// Returns the character attribute for the given color. +WORD GetColorAttribute(GTestColor color) { + switch (color) { + case COLOR_RED: return FOREGROUND_RED; + case COLOR_GREEN: return FOREGROUND_GREEN; + case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; + default: return 0; + } +} + +#else + +// Returns the ANSI color code for the given color. COLOR_DEFAULT is +// an invalid input. +const char* GetAnsiColorCode(GTestColor color) { + switch (color) { + case COLOR_RED: return "1"; + case COLOR_GREEN: return "2"; + case COLOR_YELLOW: return "3"; + default: return NULL; + }; +} + +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + +// Returns true iff Google Test should use colors in the output. +bool ShouldUseColor(bool stdout_is_tty) { + const char* const gtest_color = GTEST_FLAG(color).c_str(); + + if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { +#if GTEST_OS_WINDOWS + // On Windows the TERM variable is usually not set, but the + // console there does support colors. + return stdout_is_tty; +#else + // On non-Windows platforms, we rely on the TERM variable. + const char* const term = posix::GetEnv("TERM"); + const bool term_supports_color = + String::CStringEquals(term, "xterm") || + String::CStringEquals(term, "xterm-color") || + String::CStringEquals(term, "xterm-256color") || + String::CStringEquals(term, "screen") || + String::CStringEquals(term, "screen-256color") || + String::CStringEquals(term, "linux") || + String::CStringEquals(term, "cygwin"); + return stdout_is_tty && term_supports_color; +#endif // GTEST_OS_WINDOWS + } + + return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || + String::CaseInsensitiveCStringEquals(gtest_color, "true") || + String::CaseInsensitiveCStringEquals(gtest_color, "t") || + String::CStringEquals(gtest_color, "1"); + // We take "yes", "true", "t", and "1" as meaning "yes". If the + // value is neither one of these nor "auto", we treat it as "no" to + // be conservative. +} + +// Helpers for printing colored strings to stdout. Note that on Windows, we +// cannot simply emit special characters and have the terminal change colors. +// This routine must actually emit the characters rather than return a string +// that would be colored when printed, as can be done on Linux. +void ColoredPrintf(GTestColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || GTEST_OS_IOS + const bool use_color = false; +#else + static const bool in_color_mode = + ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0); + const bool use_color = in_color_mode && (color != COLOR_DEFAULT); +#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS + // The '!= 0' comparison is necessary to satisfy MSVC 7.1. + + if (!use_color) { + vprintf(fmt, args); + va_end(args); + return; + } + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetColorAttribute(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + printf("\033[0;3%sm", GetAnsiColorCode(color)); + vprintf(fmt, args); + printf("\033[m"); // Resets the terminal to default. +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + va_end(args); +} + +// Text printed in Google Test's text output and --gunit_list_tests +// output to label the type parameter and value parameter for a test. +static const char kTypeParamLabel[] = "TypeParam"; +static const char kValueParamLabel[] = "GetParam()"; + +void PrintFullTestCommentIfPresent(const TestInfo& test_info) { + const char* const type_param = test_info.type_param(); + const char* const value_param = test_info.value_param(); + + if (type_param != NULL || value_param != NULL) { + printf(", where "); + if (type_param != NULL) { + printf("%s = %s", kTypeParamLabel, type_param); + if (value_param != NULL) + printf(" and "); + } + if (value_param != NULL) { + printf("%s = %s", kValueParamLabel, value_param); + } + } +} + +// This class implements the TestEventListener interface. +// +// Class PrettyUnitTestResultPrinter is copyable. +class PrettyUnitTestResultPrinter : public TestEventListener { + public: + PrettyUnitTestResultPrinter() {} + static void PrintTestName(const char * test_case, const char * test) { + printf("%s.%s", test_case, test); + } + + // The following methods override what's in the TestEventListener class. + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} + + private: + static void PrintFailedTests(const UnitTest& unit_test); +}; + + // Fired before each iteration of tests starts. +void PrettyUnitTestResultPrinter::OnTestIterationStart( + const UnitTest& unit_test, int iteration) { + if (GTEST_FLAG(repeat) != 1) + printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1); + + const char* const filter = GTEST_FLAG(filter).c_str(); + + // Prints the filter if it's not *. This reminds the user that some + // tests may be skipped. + if (!String::CStringEquals(filter, kUniversalFilter)) { + ColoredPrintf(COLOR_YELLOW, + "Note: %s filter = %s\n", GTEST_NAME_, filter); + } + + if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { + const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); + ColoredPrintf(COLOR_YELLOW, + "Note: This is test shard %d of %s.\n", + static_cast<int>(shard_index) + 1, + internal::posix::GetEnv(kTestTotalShards)); + } + + if (GTEST_FLAG(shuffle)) { + ColoredPrintf(COLOR_YELLOW, + "Note: Randomizing tests' orders with a seed of %d .\n", + unit_test.random_seed()); + } + + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("Running %s from %s.\n", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment set-up.\n"); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) { + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s", counts.c_str(), test_case.name()); + if (test_case.type_param() == NULL) { + printf("\n"); + } else { + printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param()); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) { + ColoredPrintf(COLOR_GREEN, "[ RUN ] "); + PrintTestName(test_info.test_case_name(), test_info.name()); + printf("\n"); + fflush(stdout); +} + +// Called after an assertion failure. +void PrettyUnitTestResultPrinter::OnTestPartResult( + const TestPartResult& result) { + // If the test part succeeded, we don't need to do anything. + if (result.type() == TestPartResult::kSuccess) + return; + + // Print failure message from the assertion (e.g. expected this and got that). + PrintTestPartResult(result); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) { + if (test_info.result()->Passed()) { + ColoredPrintf(COLOR_GREEN, "[ OK ] "); + } else { + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + } + PrintTestName(test_info.test_case_name(), test_info.name()); + if (test_info.result()->Failed()) + PrintFullTestCommentIfPresent(test_info); + + if (GTEST_FLAG(print_time)) { + printf(" (%s ms)\n", internal::StreamableToString( + test_info.result()->elapsed_time()).c_str()); + } else { + printf("\n"); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) { + if (!GTEST_FLAG(print_time)) return; + + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s (%s ms total)\n\n", + counts.c_str(), test_case.name(), + internal::StreamableToString(test_case.elapsed_time()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment tear-down\n"); + fflush(stdout); +} + +// Internal helper for printing the list of failed tests. +void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) { + const int failed_test_count = unit_test.failed_test_count(); + if (failed_test_count == 0) { + return; + } + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + const TestCase& test_case = *unit_test.GetTestCase(i); + if (!test_case.should_run() || (test_case.failed_test_count() == 0)) { + continue; + } + for (int j = 0; j < test_case.total_test_count(); ++j) { + const TestInfo& test_info = *test_case.GetTestInfo(j); + if (!test_info.should_run() || test_info.result()->Passed()) { + continue; + } + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s.%s", test_case.name(), test_info.name()); + PrintFullTestCommentIfPresent(test_info); + printf("\n"); + } + } +} + +void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("%s from %s ran.", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + if (GTEST_FLAG(print_time)) { + printf(" (%s ms total)", + internal::StreamableToString(unit_test.elapsed_time()).c_str()); + } + printf("\n"); + ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); + printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str()); + + int num_failures = unit_test.failed_test_count(); + if (!unit_test.Passed()) { + const int failed_test_count = unit_test.failed_test_count(); + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); + PrintFailedTests(unit_test); + printf("\n%2d FAILED %s\n", num_failures, + num_failures == 1 ? "TEST" : "TESTS"); + } + + int num_disabled = unit_test.reportable_disabled_test_count(); + if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { + if (!num_failures) { + printf("\n"); // Add a spacer if no FAILURE banner is displayed. + } + ColoredPrintf(COLOR_YELLOW, + " YOU HAVE %d DISABLED %s\n\n", + num_disabled, + num_disabled == 1 ? "TEST" : "TESTS"); + } + // Ensure that Google Test output is printed before, e.g., heapchecker output. + fflush(stdout); +} + +// End PrettyUnitTestResultPrinter + +// class TestEventRepeater +// +// This class forwards events to other event listeners. +class TestEventRepeater : public TestEventListener { + public: + TestEventRepeater() : forwarding_enabled_(true) {} + virtual ~TestEventRepeater(); + void Append(TestEventListener *listener); + TestEventListener* Release(TestEventListener* listener); + + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled() const { return forwarding_enabled_; } + void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; } + + virtual void OnTestProgramStart(const UnitTest& unit_test); + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test); + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test); + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& unit_test); + + private: + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled_; + // The list of listeners that receive events. + std::vector<TestEventListener*> listeners_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater); +}; + +TestEventRepeater::~TestEventRepeater() { + ForEach(listeners_, Delete<TestEventListener>); +} + +void TestEventRepeater::Append(TestEventListener *listener) { + listeners_.push_back(listener); +} + +// TODO(vladl@google.com): Factor the search functionality into Vector::Find. +TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { + for (size_t i = 0; i < listeners_.size(); ++i) { + if (listeners_[i] == listener) { + listeners_.erase(listeners_.begin() + i); + return listener; + } + } + + return NULL; +} + +// Since most methods are very similar, use macros to reduce boilerplate. +// This defines a member that forwards the call to all listeners. +#define GTEST_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (size_t i = 0; i < listeners_.size(); i++) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} +// This defines a member that forwards the call to all listeners in reverse +// order. +#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} + +GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest) +GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest) +GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase) +GTEST_REPEATER_METHOD_(OnTestStart, TestInfo) +GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult) +GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo) +GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase) +GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest) + +#undef GTEST_REPEATER_METHOD_ +#undef GTEST_REVERSE_REPEATER_METHOD_ + +void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (size_t i = 0; i < listeners_.size(); i++) { + listeners_[i]->OnTestIterationStart(unit_test, iteration); + } + } +} + +void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { + listeners_[i]->OnTestIterationEnd(unit_test, iteration); + } + } +} + +// End TestEventRepeater + +// This class generates an XML output file. +class XmlUnitTestResultPrinter : public EmptyTestEventListener { + public: + explicit XmlUnitTestResultPrinter(const char* output_file); + + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + + private: + // Is c a whitespace character that is normalized to a space character + // when it appears in an XML attribute value? + static bool IsNormalizableWhitespace(char c) { + return c == 0x9 || c == 0xA || c == 0xD; + } + + // May c appear in a well-formed XML document? + static bool IsValidXmlCharacter(char c) { + return IsNormalizableWhitespace(c) || c >= 0x20; + } + + // Returns an XML-escaped copy of the input string str. If + // is_attribute is true, the text is meant to appear as an attribute + // value, and normalizable whitespace is preserved by replacing it + // with character references. + static std::string EscapeXml(const std::string& str, bool is_attribute); + + // Returns the given string with all characters invalid in XML removed. + static std::string RemoveInvalidXmlCharacters(const std::string& str); + + // Convenience wrapper around EscapeXml when str is an attribute value. + static std::string EscapeXmlAttribute(const std::string& str) { + return EscapeXml(str, true); + } + + // Convenience wrapper around EscapeXml when str is not an attribute value. + static std::string EscapeXmlText(const char* str) { + return EscapeXml(str, false); + } + + // Verifies that the given attribute belongs to the given element and + // streams the attribute as XML. + static void OutputXmlAttribute(std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value); + + // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. + static void OutputXmlCDataSection(::std::ostream* stream, const char* data); + + // Streams an XML representation of a TestInfo object. + static void OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info); + + // Prints an XML representation of a TestCase object + static void PrintXmlTestCase(::std::ostream* stream, + const TestCase& test_case); + + // Prints an XML summary of unit_test to output stream out. + static void PrintXmlUnitTest(::std::ostream* stream, + const UnitTest& unit_test); + + // Produces a string representing the test properties in a result as space + // delimited XML attributes based on the property key="value" pairs. + // When the std::string is not empty, it includes a space at the beginning, + // to delimit this attribute from prior attributes. + static std::string TestPropertiesAsXmlAttributes(const TestResult& result); + + // The output file. + const std::string output_file_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter); +}; + +// Creates a new XmlUnitTestResultPrinter. +XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) + : output_file_(output_file) { + if (output_file_.c_str() == NULL || output_file_.empty()) { + fprintf(stderr, "XML output file may not be null\n"); + fflush(stderr); + exit(EXIT_FAILURE); + } +} + +// Called after the unit test ends. +void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + FILE* xmlout = NULL; + FilePath output_file(output_file_); + FilePath output_dir(output_file.RemoveFileName()); + + if (output_dir.CreateDirectoriesRecursively()) { + xmlout = posix::FOpen(output_file_.c_str(), "w"); + } + if (xmlout == NULL) { + // TODO(wan): report the reason of the failure. + // + // We don't do it for now as: + // + // 1. There is no urgent need for it. + // 2. It's a bit involved to make the errno variable thread-safe on + // all three operating systems (Linux, Windows, and Mac OS). + // 3. To interpret the meaning of errno in a thread-safe way, + // we need the strerror_r() function, which is not available on + // Windows. + fprintf(stderr, + "Unable to open file \"%s\"\n", + output_file_.c_str()); + fflush(stderr); + exit(EXIT_FAILURE); + } + std::stringstream stream; + PrintXmlUnitTest(&stream, unit_test); + fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); + fclose(xmlout); +} + +// Returns an XML-escaped copy of the input string str. If is_attribute +// is true, the text is meant to appear as an attribute value, and +// normalizable whitespace is preserved by replacing it with character +// references. +// +// Invalid XML characters in str, if any, are stripped from the output. +// It is expected that most, if not all, of the text processed by this +// module will consist of ordinary English text. +// If this module is ever modified to produce version 1.1 XML output, +// most invalid characters can be retained using character references. +// TODO(wan): It might be nice to have a minimally invasive, human-readable +// escaping scheme for invalid characters, rather than dropping them. +std::string XmlUnitTestResultPrinter::EscapeXml( + const std::string& str, bool is_attribute) { + Message m; + + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + switch (ch) { + case '<': + m << "<"; + break; + case '>': + m << ">"; + break; + case '&': + m << "&"; + break; + case '\'': + if (is_attribute) + m << "'"; + else + m << '\''; + break; + case '"': + if (is_attribute) + m << """; + else + m << '"'; + break; + default: + if (IsValidXmlCharacter(ch)) { + if (is_attribute && IsNormalizableWhitespace(ch)) + m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch)) + << ";"; + else + m << ch; + } + break; + } + } + + return m.GetString(); +} + +// Returns the given string with all characters invalid in XML removed. +// Currently invalid characters are dropped from the string. An +// alternative is to replace them with certain characters such as . or ?. +std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters( + const std::string& str) { + std::string output; + output.reserve(str.size()); + for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) + if (IsValidXmlCharacter(*it)) + output.push_back(*it); + + return output; +} + +// The following routines generate an XML representation of a UnitTest +// object. +// +// This is how Google Test concepts map to the DTD: +// +// <testsuites name="AllTests"> <-- corresponds to a UnitTest object +// <testsuite name="testcase-name"> <-- corresponds to a TestCase object +// <testcase name="test-name"> <-- corresponds to a TestInfo object +// <failure message="...">...</failure> +// <failure message="...">...</failure> +// <failure message="...">...</failure> +// <-- individual assertion failures +// </testcase> +// </testsuite> +// </testsuites> + +// Formats the given time in milliseconds as seconds. +std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) { + ::std::stringstream ss; + ss << ms/1000.0; + return ss.str(); +} + +// Converts the given epoch time in milliseconds to a date string in the ISO +// 8601 format, without the timezone information. +std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) { + // Using non-reentrant version as localtime_r is not portable. + time_t seconds = static_cast<time_t>(ms / 1000); +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4996) // Temporarily disables warning 4996 + // (function or variable may be unsafe). + const struct tm* const time_struct = localtime(&seconds); // NOLINT +# pragma warning(pop) // Restores the warning state again. +#else + const struct tm* const time_struct = localtime(&seconds); // NOLINT +#endif + if (time_struct == NULL) + return ""; // Invalid ms value + + // YYYY-MM-DDThh:mm:ss + return StreamableToString(time_struct->tm_year + 1900) + "-" + + String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" + + String::FormatIntWidth2(time_struct->tm_mday) + "T" + + String::FormatIntWidth2(time_struct->tm_hour) + ":" + + String::FormatIntWidth2(time_struct->tm_min) + ":" + + String::FormatIntWidth2(time_struct->tm_sec); +} + +// Streams an XML CDATA section, escaping invalid CDATA sequences as needed. +void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream, + const char* data) { + const char* segment = data; + *stream << "<![CDATA["; + for (;;) { + const char* const next_segment = strstr(segment, "]]>"); + if (next_segment != NULL) { + stream->write( + segment, static_cast<std::streamsize>(next_segment - segment)); + *stream << "]]>]]><![CDATA["; + segment = next_segment + strlen("]]>"); + } else { + *stream << segment; + break; + } + } + *stream << "]]>"; +} + +void XmlUnitTestResultPrinter::OutputXmlAttribute( + std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value) { + const std::vector<std::string>& allowed_names = + GetReservedAttributesForElement(element_name); + + GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != + allowed_names.end()) + << "Attribute " << name << " is not allowed for element <" << element_name + << ">."; + + *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\""; +} + +// Prints an XML representation of a TestInfo object. +// TODO(wan): There is also value in printing properties with the plain printer. +void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info) { + const TestResult& result = *test_info.result(); + const std::string kTestcase = "testcase"; + + *stream << " <testcase"; + OutputXmlAttribute(stream, kTestcase, "name", test_info.name()); + + if (test_info.value_param() != NULL) { + OutputXmlAttribute(stream, kTestcase, "value_param", + test_info.value_param()); + } + if (test_info.type_param() != NULL) { + OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param()); + } + + OutputXmlAttribute(stream, kTestcase, "status", + test_info.should_run() ? "run" : "notrun"); + OutputXmlAttribute(stream, kTestcase, "time", + FormatTimeInMillisAsSeconds(result.elapsed_time())); + OutputXmlAttribute(stream, kTestcase, "classname", test_case_name); + *stream << TestPropertiesAsXmlAttributes(result); + + int failures = 0; + for (int i = 0; i < result.total_part_count(); ++i) { + const TestPartResult& part = result.GetTestPartResult(i); + if (part.failed()) { + if (++failures == 1) { + *stream << ">\n"; + } + const string location = internal::FormatCompilerIndependentFileLocation( + part.file_name(), part.line_number()); + const string summary = location + "\n" + part.summary(); + *stream << " <failure message=\"" + << EscapeXmlAttribute(summary.c_str()) + << "\" type=\"\">"; + const string detail = location + "\n" + part.message(); + OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str()); + *stream << "</failure>\n"; + } + } + + if (failures == 0) + *stream << " />\n"; + else + *stream << " </testcase>\n"; +} + +// Prints an XML representation of a TestCase object +void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, + const TestCase& test_case) { + const std::string kTestsuite = "testsuite"; + *stream << " <" << kTestsuite; + OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); + OutputXmlAttribute(stream, kTestsuite, "tests", + StreamableToString(test_case.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuite, "failures", + StreamableToString(test_case.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuite, "disabled", + StreamableToString(test_case.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuite, "errors", "0"); + OutputXmlAttribute(stream, kTestsuite, "time", + FormatTimeInMillisAsSeconds(test_case.elapsed_time())); + *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()) + << ">\n"; + + for (int i = 0; i < test_case.total_test_count(); ++i) { + if (test_case.GetTestInfo(i)->is_reportable()) + OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + } + *stream << " </" << kTestsuite << ">\n"; +} + +// Prints an XML summary of unit_test to output stream out. +void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, + const UnitTest& unit_test) { + const std::string kTestsuites = "testsuites"; + + *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; + *stream << "<" << kTestsuites; + + OutputXmlAttribute(stream, kTestsuites, "tests", + StreamableToString(unit_test.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuites, "failures", + StreamableToString(unit_test.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuites, "disabled", + StreamableToString(unit_test.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuites, "errors", "0"); + OutputXmlAttribute( + stream, kTestsuites, "timestamp", + FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp())); + OutputXmlAttribute(stream, kTestsuites, "time", + FormatTimeInMillisAsSeconds(unit_test.elapsed_time())); + + if (GTEST_FLAG(shuffle)) { + OutputXmlAttribute(stream, kTestsuites, "random_seed", + StreamableToString(unit_test.random_seed())); + } + + *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result()); + + OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); + *stream << ">\n"; + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + if (unit_test.GetTestCase(i)->reportable_test_count() > 0) + PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); + } + *stream << "</" << kTestsuites << ">\n"; +} + +// Produces a string representing the test properties in a result as space +// delimited XML attributes based on the property key="value" pairs. +std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( + const TestResult& result) { + Message attributes; + for (int i = 0; i < result.test_property_count(); ++i) { + const TestProperty& property = result.GetTestProperty(i); + attributes << " " << property.key() << "=" + << "\"" << EscapeXmlAttribute(property.value()) << "\""; + } + return attributes.GetString(); +} + +// End XmlUnitTestResultPrinter + +#if GTEST_CAN_STREAM_RESULTS_ + +// Checks if str contains '=', '&', '%' or '\n' characters. If yes, +// replaces them by "%xx" where xx is their hexadecimal value. For +// example, replaces "=" with "%3D". This algorithm is O(strlen(str)) +// in both time and space -- important as the input str may contain an +// arbitrarily long test failure message and stack trace. +string StreamingListener::UrlEncode(const char* str) { + string result; + result.reserve(strlen(str) + 1); + for (char ch = *str; ch != '\0'; ch = *++str) { + switch (ch) { + case '%': + case '=': + case '&': + case '\n': + result.append("%" + String::FormatByte(static_cast<unsigned char>(ch))); + break; + default: + result.push_back(ch); + break; + } + } + return result; +} + +void StreamingListener::SocketWriter::MakeConnection() { + GTEST_CHECK_(sockfd_ == -1) + << "MakeConnection() can't be called when there is already a connection."; + + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses. + hints.ai_socktype = SOCK_STREAM; + addrinfo* servinfo = NULL; + + // Use the getaddrinfo() to get a linked list of IP addresses for + // the given host name. + const int error_num = getaddrinfo( + host_name_.c_str(), port_num_.c_str(), &hints, &servinfo); + if (error_num != 0) { + GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: " + << gai_strerror(error_num); + } + + // Loop through all the results and connect to the first we can. + for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL; + cur_addr = cur_addr->ai_next) { + sockfd_ = socket( + cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol); + if (sockfd_ != -1) { + // Connect the client socket to the server socket. + if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) { + close(sockfd_); + sockfd_ = -1; + } + } + } + + freeaddrinfo(servinfo); // all done with this structure + + if (sockfd_ == -1) { + GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to " + << host_name_ << ":" << port_num_; + } +} + +// End of class Streaming Listener +#endif // GTEST_CAN_STREAM_RESULTS__ + +// Class ScopedTrace + +// Pushes the given source file location and message onto a per-thread +// trace stack maintained by Google Test. +ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + TraceInfo trace; + trace.file = file; + trace.line = line; + trace.message = message.GetString(); + + UnitTest::GetInstance()->PushGTestTrace(trace); +} + +// Pops the info pushed by the c'tor. +ScopedTrace::~ScopedTrace() + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + UnitTest::GetInstance()->PopGTestTrace(); +} + + +// class OsStackTraceGetter + +// Returns the current OS stack trace as an std::string. Parameters: +// +// max_depth - the maximum number of stack frames to be included +// in the trace. +// skip_count - the number of top frames to be skipped; doesn't count +// against max_depth. +// +string OsStackTraceGetter::CurrentStackTrace(int /* max_depth */, + int /* skip_count */) + GTEST_LOCK_EXCLUDED_(mutex_) { + return ""; +} + +void OsStackTraceGetter::UponLeavingGTest() + GTEST_LOCK_EXCLUDED_(mutex_) { +} + +const char* const +OsStackTraceGetter::kElidedFramesMarker = + "... " GTEST_NAME_ " internal frames ..."; + +// A helper class that creates the premature-exit file in its +// constructor and deletes the file in its destructor. +class ScopedPrematureExitFile { + public: + explicit ScopedPrematureExitFile(const char* premature_exit_filepath) + : premature_exit_filepath_(premature_exit_filepath) { + // If a path to the premature-exit file is specified... + if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') { + // create the file with a single "0" character in it. I/O + // errors are ignored as there's nothing better we can do and we + // don't want to fail the test because of this. + FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); + fwrite("0", 1, 1, pfile); + fclose(pfile); + } + } + + ~ScopedPrematureExitFile() { + if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') { + remove(premature_exit_filepath_); + } + } + + private: + const char* const premature_exit_filepath_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); +}; + +} // namespace internal + +// class TestEventListeners + +TestEventListeners::TestEventListeners() + : repeater_(new internal::TestEventRepeater()), + default_result_printer_(NULL), + default_xml_generator_(NULL) { +} + +TestEventListeners::~TestEventListeners() { delete repeater_; } + +// Returns the standard listener responsible for the default console +// output. Can be removed from the listeners list to shut down default +// console output. Note that removing this object from the listener list +// with Release transfers its ownership to the user. +void TestEventListeners::Append(TestEventListener* listener) { + repeater_->Append(listener); +} + +// Removes the given event listener from the list and returns it. It then +// becomes the caller's responsibility to delete the listener. Returns +// NULL if the listener is not found in the list. +TestEventListener* TestEventListeners::Release(TestEventListener* listener) { + if (listener == default_result_printer_) + default_result_printer_ = NULL; + else if (listener == default_xml_generator_) + default_xml_generator_ = NULL; + return repeater_->Release(listener); +} + +// Returns repeater that broadcasts the TestEventListener events to all +// subscribers. +TestEventListener* TestEventListeners::repeater() { return repeater_; } + +// Sets the default_result_printer attribute to the provided listener. +// The listener is also added to the listener list and previous +// default_result_printer is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) { + if (default_result_printer_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_result_printer_); + default_result_printer_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Sets the default_xml_generator attribute to the provided listener. The +// listener is also added to the listener list and previous +// default_xml_generator is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) { + if (default_xml_generator_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_xml_generator_); + default_xml_generator_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Controls whether events will be forwarded by the repeater to the +// listeners in the list. +bool TestEventListeners::EventForwardingEnabled() const { + return repeater_->forwarding_enabled(); +} + +void TestEventListeners::SuppressEventForwarding() { + repeater_->set_forwarding_enabled(false); +} + +// class UnitTest + +// Gets the singleton UnitTest object. The first time this method is +// called, a UnitTest object is constructed and returned. Consecutive +// calls will return the same object. +// +// We don't protect this under mutex_ as a user is not supposed to +// call this before main() starts, from which point on the return +// value will never change. +UnitTest* UnitTest::GetInstance() { + // When compiled with MSVC 7.1 in optimized mode, destroying the + // UnitTest object upon exiting the program messes up the exit code, + // causing successful tests to appear failed. We have to use a + // different implementation in this case to bypass the compiler bug. + // This implementation makes the compiler happy, at the cost of + // leaking the UnitTest object. + + // CodeGear C++Builder insists on a public destructor for the + // default implementation. Use this implementation to keep good OO + // design with private destructor. + +#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) + static UnitTest* const instance = new UnitTest; + return instance; +#else + static UnitTest instance; + return &instance; +#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) +} + +// Gets the number of successful test cases. +int UnitTest::successful_test_case_count() const { + return impl()->successful_test_case_count(); +} + +// Gets the number of failed test cases. +int UnitTest::failed_test_case_count() const { + return impl()->failed_test_case_count(); +} + +// Gets the number of all test cases. +int UnitTest::total_test_case_count() const { + return impl()->total_test_case_count(); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTest::test_case_to_run_count() const { + return impl()->test_case_to_run_count(); +} + +// Gets the number of successful tests. +int UnitTest::successful_test_count() const { + return impl()->successful_test_count(); +} + +// Gets the number of failed tests. +int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTest::reportable_disabled_test_count() const { + return impl()->reportable_disabled_test_count(); +} + +// Gets the number of disabled tests. +int UnitTest::disabled_test_count() const { + return impl()->disabled_test_count(); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTest::reportable_test_count() const { + return impl()->reportable_test_count(); +} + +// Gets the number of all tests. +int UnitTest::total_test_count() const { return impl()->total_test_count(); } + +// Gets the number of tests that should run. +int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); } + +// Gets the time of the test program start, in ms from the start of the +// UNIX epoch. +internal::TimeInMillis UnitTest::start_timestamp() const { + return impl()->start_timestamp(); +} + +// Gets the elapsed time, in milliseconds. +internal::TimeInMillis UnitTest::elapsed_time() const { + return impl()->elapsed_time(); +} + +// Returns true iff the unit test passed (i.e. all test cases passed). +bool UnitTest::Passed() const { return impl()->Passed(); } + +// Returns true iff the unit test failed (i.e. some test case failed +// or something outside of all tests failed). +bool UnitTest::Failed() const { return impl()->Failed(); } + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +const TestCase* UnitTest::GetTestCase(int i) const { + return impl()->GetTestCase(i); +} + +// Returns the TestResult containing information on test failures and +// properties logged outside of individual test cases. +const TestResult& UnitTest::ad_hoc_test_result() const { + return *impl()->ad_hoc_test_result(); +} + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +TestCase* UnitTest::GetMutableTestCase(int i) { + return impl()->GetMutableTestCase(i); +} + +// Returns the list of event listeners that can be used to track events +// inside Google Test. +TestEventListeners& UnitTest::listeners() { + return *impl()->listeners(); +} + +// Registers and returns a global test environment. When a test +// program is run, all global test environments will be set-up in the +// order they were registered. After all tests in the program have +// finished, all global test environments will be torn-down in the +// *reverse* order they were registered. +// +// The UnitTest object takes ownership of the given environment. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +Environment* UnitTest::AddEnvironment(Environment* env) { + if (env == NULL) { + return NULL; + } + + impl_->environments().push_back(env); + return env; +} + +// Adds a TestPartResult to the current TestResult object. All Google Test +// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call +// this to report their results. The user code should use the +// assertion macros instead of calling this directly. +void UnitTest::AddTestPartResult( + TestPartResult::Type result_type, + const char* file_name, + int line_number, + const std::string& message, + const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) { + Message msg; + msg << message; + + internal::MutexLock lock(&mutex_); + if (impl_->gtest_trace_stack().size() > 0) { + msg << "\n" << GTEST_NAME_ << " trace:"; + + for (int i = static_cast<int>(impl_->gtest_trace_stack().size()); + i > 0; --i) { + const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1]; + msg << "\n" << internal::FormatFileLocation(trace.file, trace.line) + << " " << trace.message; + } + } + + if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { + msg << internal::kStackTraceMarker << os_stack_trace; + } + + const TestPartResult result = + TestPartResult(result_type, file_name, line_number, + msg.GetString().c_str()); + impl_->GetTestPartResultReporterForCurrentThread()-> + ReportTestPartResult(result); + + if (result_type != TestPartResult::kSuccess) { + // gtest_break_on_failure takes precedence over + // gtest_throw_on_failure. This allows a user to set the latter + // in the code (perhaps in order to use Google Test assertions + // with another testing framework) and specify the former on the + // command line for debugging. + if (GTEST_FLAG(break_on_failure)) { +#if GTEST_OS_WINDOWS + // Using DebugBreak on Windows allows gtest to still break into a debugger + // when a failure happens and both the --gtest_break_on_failure and + // the --gtest_catch_exceptions flags are specified. + DebugBreak(); +#else + // Dereference NULL through a volatile pointer to prevent the compiler + // from removing. We use this rather than abort() or __builtin_trap() for + // portability: Symbian doesn't implement abort() well, and some debuggers + // don't correctly trap abort(). + *static_cast<volatile int*>(NULL) = 1; +#endif // GTEST_OS_WINDOWS + } else if (GTEST_FLAG(throw_on_failure)) { +#if GTEST_HAS_EXCEPTIONS + throw internal::GoogleTestFailureException(result); +#else + // We cannot call abort() as it generates a pop-up in debug mode + // that cannot be suppressed in VC 7.1 or below. + exit(1); +#endif + } + } +} + +// Adds a TestProperty to the current TestResult object when invoked from +// inside a test, to current TestCase's ad_hoc_test_result_ when invoked +// from SetUpTestCase or TearDownTestCase, or to the global property set +// when invoked elsewhere. If the result already contains a property with +// the same key, the value will be updated. +void UnitTest::RecordProperty(const std::string& key, + const std::string& value) { + impl_->RecordProperty(TestProperty(key, value)); +} + +// Runs all tests in this UnitTest object and prints the result. +// Returns 0 if successful, or 1 otherwise. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +int UnitTest::Run() { + const bool in_death_test_child_process = + internal::GTEST_FLAG(internal_run_death_test).length() > 0; + + // Google Test implements this protocol for catching that a test + // program exits before returning control to Google Test: + // + // 1. Upon start, Google Test creates a file whose absolute path + // is specified by the environment variable + // TEST_PREMATURE_EXIT_FILE. + // 2. When Google Test has finished its work, it deletes the file. + // + // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before + // running a Google-Test-based test program and check the existence + // of the file at the end of the test execution to see if it has + // exited prematurely. + + // If we are in the child process of a death test, don't + // create/delete the premature exit file, as doing so is unnecessary + // and will confuse the parent process. Otherwise, create/delete + // the file upon entering/leaving this function. If the program + // somehow exits before this function has a chance to return, the + // premature-exit file will be left undeleted, causing a test runner + // that understands the premature-exit-file protocol to report the + // test as having failed. + const internal::ScopedPrematureExitFile premature_exit_file( + in_death_test_child_process ? + NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE")); + + // Captures the value of GTEST_FLAG(catch_exceptions). This value will be + // used for the duration of the program. + impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); + +#if GTEST_HAS_SEH + // Either the user wants Google Test to catch exceptions thrown by the + // tests or this is executing in the context of death test child + // process. In either case the user does not want to see pop-up dialogs + // about crashes - they are expected. + if (impl()->catch_exceptions() || in_death_test_child_process) { +# if !GTEST_OS_WINDOWS_MOBILE + // SetErrorMode doesn't exist on CE. + SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | + SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); +# endif // !GTEST_OS_WINDOWS_MOBILE + +# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE + // Death test children can be terminated with _abort(). On Windows, + // _abort() can show a dialog with a warning message. This forces the + // abort message to go to stderr instead. + _set_error_mode(_OUT_TO_STDERR); +# endif + +# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE + // In the debug version, Visual Studio pops up a separate dialog + // offering a choice to debug the aborted program. We need to suppress + // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement + // executed. Google Test will notify the user of any unexpected + // failure via stderr. + // + // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. + // Users of prior VC versions shall suffer the agony and pain of + // clicking through the countless debug dialogs. + // TODO(vladl@google.com): find a way to suppress the abort dialog() in the + // debug mode when compiled with VC 7.1 or lower. + if (!GTEST_FLAG(break_on_failure)) + _set_abort_behavior( + 0x0, // Clear the following flags: + _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. +# endif + } +#endif // GTEST_HAS_SEH + + return internal::HandleExceptionsInMethodIfSupported( + impl(), + &internal::UnitTestImpl::RunAllTests, + "auxiliary test code (environments or event listeners)") ? 0 : 1; +} + +// Returns the working directory when the first TEST() or TEST_F() was +// executed. +const char* UnitTest::original_working_dir() const { + return impl_->original_working_dir_.c_str(); +} + +// Returns the TestCase object for the test that's currently running, +// or NULL if no test is running. +const TestCase* UnitTest::current_test_case() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_case(); +} + +// Returns the TestInfo object for the test that's currently running, +// or NULL if no test is running. +const TestInfo* UnitTest::current_test_info() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_info(); +} + +// Returns the random seed used at the start of the current test run. +int UnitTest::random_seed() const { return impl_->random_seed(); } + +#if GTEST_HAS_PARAM_TEST +// Returns ParameterizedTestCaseRegistry object used to keep track of +// value-parameterized tests and instantiate and register them. +internal::ParameterizedTestCaseRegistry& + UnitTest::parameterized_test_registry() + GTEST_LOCK_EXCLUDED_(mutex_) { + return impl_->parameterized_test_registry(); +} +#endif // GTEST_HAS_PARAM_TEST + +// Creates an empty UnitTest. +UnitTest::UnitTest() { + impl_ = new internal::UnitTestImpl(this); +} + +// Destructor of UnitTest. +UnitTest::~UnitTest() { + delete impl_; +} + +// Pushes a trace defined by SCOPED_TRACE() on to the per-thread +// Google Test trace stack. +void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().push_back(trace); +} + +// Pops a trace from the per-thread Google Test trace stack. +void UnitTest::PopGTestTrace() + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().pop_back(); +} + +namespace internal { + +UnitTestImpl::UnitTestImpl(UnitTest* parent) + : parent_(parent), +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4355) // Temporarily disables warning 4355 + // (using this in initializer). + default_global_test_part_result_reporter_(this), + default_per_thread_test_part_result_reporter_(this), +# pragma warning(pop) // Restores the warning state again. +#else + default_global_test_part_result_reporter_(this), + default_per_thread_test_part_result_reporter_(this), +#endif // _MSC_VER + global_test_part_result_repoter_( + &default_global_test_part_result_reporter_), + per_thread_test_part_result_reporter_( + &default_per_thread_test_part_result_reporter_), +#if GTEST_HAS_PARAM_TEST + parameterized_test_registry_(), + parameterized_tests_registered_(false), +#endif // GTEST_HAS_PARAM_TEST + last_death_test_case_(-1), + current_test_case_(NULL), + current_test_info_(NULL), + ad_hoc_test_result_(), + os_stack_trace_getter_(NULL), + post_flag_parse_init_performed_(false), + random_seed_(0), // Will be overridden by the flag before first use. + random_(0), // Will be reseeded before first use. + start_timestamp_(0), + elapsed_time_(0), +#if GTEST_HAS_DEATH_TEST + death_test_factory_(new DefaultDeathTestFactory), +#endif + // Will be overridden by the flag before first use. + catch_exceptions_(false) { + listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter); +} + +UnitTestImpl::~UnitTestImpl() { + // Deletes every TestCase. + ForEach(test_cases_, internal::Delete<TestCase>); + + // Deletes every Environment. + ForEach(environments_, internal::Delete<Environment>); + + delete os_stack_trace_getter_; +} + +// Adds a TestProperty to the current TestResult object when invoked in a +// context of a test, to current test case's ad_hoc_test_result when invoke +// from SetUpTestCase/TearDownTestCase, or to the global property set +// otherwise. If the result already contains a property with the same key, +// the value will be updated. +void UnitTestImpl::RecordProperty(const TestProperty& test_property) { + std::string xml_element; + TestResult* test_result; // TestResult appropriate for property recording. + + if (current_test_info_ != NULL) { + xml_element = "testcase"; + test_result = &(current_test_info_->result_); + } else if (current_test_case_ != NULL) { + xml_element = "testsuite"; + test_result = &(current_test_case_->ad_hoc_test_result_); + } else { + xml_element = "testsuites"; + test_result = &ad_hoc_test_result_; + } + test_result->RecordProperty(xml_element, test_property); +} + +#if GTEST_HAS_DEATH_TEST +// Disables event forwarding if the control is currently in a death test +// subprocess. Must not be called before InitGoogleTest. +void UnitTestImpl::SuppressTestEventsIfInSubprocess() { + if (internal_run_death_test_flag_.get() != NULL) + listeners()->SuppressEventForwarding(); +} +#endif // GTEST_HAS_DEATH_TEST + +// Initializes event listeners performing XML output as specified by +// UnitTestOptions. Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureXmlOutput() { + const std::string& output_format = UnitTestOptions::GetOutputFormat(); + if (output_format == "xml") { + listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); + } else if (output_format != "") { + printf("WARNING: unrecognized output format \"%s\" ignored.\n", + output_format.c_str()); + fflush(stdout); + } +} + +#if GTEST_CAN_STREAM_RESULTS_ +// Initializes event listeners for streaming test results in string form. +// Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureStreamingOutput() { + const std::string& target = GTEST_FLAG(stream_result_to); + if (!target.empty()) { + const size_t pos = target.find(':'); + if (pos != std::string::npos) { + listeners()->Append(new StreamingListener(target.substr(0, pos), + target.substr(pos+1))); + } else { + printf("WARNING: unrecognized streaming target \"%s\" ignored.\n", + target.c_str()); + fflush(stdout); + } + } +} +#endif // GTEST_CAN_STREAM_RESULTS_ + +// Performs initialization dependent upon flag values obtained in +// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to +// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest +// this function is also called from RunAllTests. Since this function can be +// called more than once, it has to be idempotent. +void UnitTestImpl::PostFlagParsingInit() { + // Ensures that this function does not execute more than once. + if (!post_flag_parse_init_performed_) { + post_flag_parse_init_performed_ = true; + +#if GTEST_HAS_DEATH_TEST + InitDeathTestSubprocessControlInfo(); + SuppressTestEventsIfInSubprocess(); +#endif // GTEST_HAS_DEATH_TEST + + // Registers parameterized tests. This makes parameterized tests + // available to the UnitTest reflection API without running + // RUN_ALL_TESTS. + RegisterParameterizedTests(); + + // Configures listeners for XML output. This makes it possible for users + // to shut down the default XML output before invoking RUN_ALL_TESTS. + ConfigureXmlOutput(); + +#if GTEST_CAN_STREAM_RESULTS_ + // Configures listeners for streaming test results to the specified server. + ConfigureStreamingOutput(); +#endif // GTEST_CAN_STREAM_RESULTS_ + } +} + +// A predicate that checks the name of a TestCase against a known +// value. +// +// This is used for implementation of the UnitTest class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestCaseNameIs is copyable. +class TestCaseNameIs { + public: + // Constructor. + explicit TestCaseNameIs(const std::string& name) + : name_(name) {} + + // Returns true iff the name of test_case matches name_. + bool operator()(const TestCase* test_case) const { + return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; + } + + private: + std::string name_; +}; + +// Finds and returns a TestCase with the given name. If one doesn't +// exist, creates one and returns it. It's the CALLER'S +// RESPONSIBILITY to ensure that this function is only called WHEN THE +// TESTS ARE NOT SHUFFLED. +// +// Arguments: +// +// test_case_name: name of the test case +// type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, + const char* type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) { + // Can we find a TestCase with the given name? + const std::vector<TestCase*>::const_iterator test_case = + std::find_if(test_cases_.begin(), test_cases_.end(), + TestCaseNameIs(test_case_name)); + + if (test_case != test_cases_.end()) + return *test_case; + + // No. Let's create one. + TestCase* const new_test_case = + new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc); + + // Is this a death test case? + if (internal::UnitTestOptions::MatchesFilter(test_case_name, + kDeathTestCaseFilter)) { + // Yes. Inserts the test case after the last death test case + // defined so far. This only works when the test cases haven't + // been shuffled. Otherwise we may end up running a death test + // after a non-death test. + ++last_death_test_case_; + test_cases_.insert(test_cases_.begin() + last_death_test_case_, + new_test_case); + } else { + // No. Appends to the end of the list. + test_cases_.push_back(new_test_case); + } + + test_case_indices_.push_back(static_cast<int>(test_case_indices_.size())); + return new_test_case; +} + +// Helpers for setting up / tearing down the given environment. They +// are for use in the ForEach() function. +static void SetUpEnvironment(Environment* env) { env->SetUp(); } +static void TearDownEnvironment(Environment* env) { env->TearDown(); } + +// Runs all tests in this UnitTest object, prints the result, and +// returns true if all tests are successful. If any exception is +// thrown during a test, the test is considered to be failed, but the +// rest of the tests will still be run. +// +// When parameterized tests are enabled, it expands and registers +// parameterized tests first in RegisterParameterizedTests(). +// All other functions called from RunAllTests() may safely assume that +// parameterized tests are ready to be counted and run. +bool UnitTestImpl::RunAllTests() { + // Makes sure InitGoogleTest() was called. + if (!GTestIsInitialized()) { + printf("%s", + "\nThis test program did NOT call ::testing::InitGoogleTest " + "before calling RUN_ALL_TESTS(). Please fix it.\n"); + return false; + } + + // Do not run any test if the --help flag was specified. + if (g_help_flag) + return true; + + // Repeats the call to the post-flag parsing initialization in case the + // user didn't call InitGoogleTest. + PostFlagParsingInit(); + + // Even if sharding is not on, test runners may want to use the + // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding + // protocol. + internal::WriteToShardStatusFileIfNeeded(); + + // True iff we are in a subprocess for running a thread-safe-style + // death test. + bool in_subprocess_for_death_test = false; + +#if GTEST_HAS_DEATH_TEST + in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); +#endif // GTEST_HAS_DEATH_TEST + + const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex, + in_subprocess_for_death_test); + + // Compares the full test names with the filter to decide which + // tests to run. + const bool has_tests_to_run = FilterTests(should_shard + ? HONOR_SHARDING_PROTOCOL + : IGNORE_SHARDING_PROTOCOL) > 0; + + // Lists the tests and exits if the --gtest_list_tests flag was specified. + if (GTEST_FLAG(list_tests)) { + // This must be called *after* FilterTests() has been called. + ListTestsMatchingFilter(); + return true; + } + + random_seed_ = GTEST_FLAG(shuffle) ? + GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0; + + // True iff at least one test has failed. + bool failed = false; + + TestEventListener* repeater = listeners()->repeater(); + + start_timestamp_ = GetTimeInMillis(); + repeater->OnTestProgramStart(*parent_); + + // How many times to repeat the tests? We don't want to repeat them + // when we are inside the subprocess of a death test. + const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); + // Repeats forever if the repeat count is negative. + const bool forever = repeat < 0; + for (int i = 0; forever || i != repeat; i++) { + // We want to preserve failures generated by ad-hoc test + // assertions executed before RUN_ALL_TESTS(). + ClearNonAdHocTestResult(); + + const TimeInMillis start = GetTimeInMillis(); + + // Shuffles test cases and tests if requested. + if (has_tests_to_run && GTEST_FLAG(shuffle)) { + random()->Reseed(random_seed_); + // This should be done before calling OnTestIterationStart(), + // such that a test event listener can see the actual test order + // in the event. + ShuffleTests(); + } + + // Tells the unit test event listeners that the tests are about to start. + repeater->OnTestIterationStart(*parent_, i); + + // Runs each test case if there is at least one test to run. + if (has_tests_to_run) { + // Sets up all environments beforehand. + repeater->OnEnvironmentsSetUpStart(*parent_); + ForEach(environments_, SetUpEnvironment); + repeater->OnEnvironmentsSetUpEnd(*parent_); + + // Runs the tests only if there was no fatal failure during global + // set-up. + if (!Test::HasFatalFailure()) { + for (int test_index = 0; test_index < total_test_case_count(); + test_index++) { + GetMutableTestCase(test_index)->Run(); + } + } + + // Tears down all environments in reverse order afterwards. + repeater->OnEnvironmentsTearDownStart(*parent_); + std::for_each(environments_.rbegin(), environments_.rend(), + TearDownEnvironment); + repeater->OnEnvironmentsTearDownEnd(*parent_); + } + + elapsed_time_ = GetTimeInMillis() - start; + + // Tells the unit test event listener that the tests have just finished. + repeater->OnTestIterationEnd(*parent_, i); + + // Gets the result and clears it. + if (!Passed()) { + failed = true; + } + + // Restores the original test order after the iteration. This + // allows the user to quickly repro a failure that happens in the + // N-th iteration without repeating the first (N - 1) iterations. + // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in + // case the user somehow changes the value of the flag somewhere + // (it's always safe to unshuffle the tests). + UnshuffleTests(); + + if (GTEST_FLAG(shuffle)) { + // Picks a new random seed for each iteration. + random_seed_ = GetNextRandomSeed(random_seed_); + } + } + + repeater->OnTestProgramEnd(*parent_); + + return !failed; +} + +// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file +// if the variable is present. If a file already exists at this location, this +// function will write over it. If the variable is present, but the file cannot +// be created, prints an error and exits. +void WriteToShardStatusFileIfNeeded() { + const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile); + if (test_shard_file != NULL) { + FILE* const file = posix::FOpen(test_shard_file, "w"); + if (file == NULL) { + ColoredPrintf(COLOR_RED, + "Could not write to the test shard status file \"%s\" " + "specified by the %s environment variable.\n", + test_shard_file, kTestShardStatusFile); + fflush(stdout); + exit(EXIT_FAILURE); + } + fclose(file); + } +} + +// Checks whether sharding is enabled by examining the relevant +// environment variable values. If the variables are present, +// but inconsistent (i.e., shard_index >= total_shards), prints +// an error and exits. If in_subprocess_for_death_test, sharding is +// disabled because it must only be applied to the original test +// process. Otherwise, we could filter out death tests we intended to execute. +bool ShouldShard(const char* total_shards_env, + const char* shard_index_env, + bool in_subprocess_for_death_test) { + if (in_subprocess_for_death_test) { + return false; + } + + const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1); + const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1); + + if (total_shards == -1 && shard_index == -1) { + return false; + } else if (total_shards == -1 && shard_index != -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestShardIndex << " = " << shard_index + << ", but have left " << kTestTotalShards << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (total_shards != -1 && shard_index == -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestTotalShards << " = " << total_shards + << ", but have left " << kTestShardIndex << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (shard_index < 0 || shard_index >= total_shards) { + const Message msg = Message() + << "Invalid environment variables: we require 0 <= " + << kTestShardIndex << " < " << kTestTotalShards + << ", but you have " << kTestShardIndex << "=" << shard_index + << ", " << kTestTotalShards << "=" << total_shards << ".\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } + + return total_shards > 1; +} + +// Parses the environment variable var as an Int32. If it is unset, +// returns default_val. If it is not an Int32, prints an error +// and aborts. +Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) { + const char* str_val = posix::GetEnv(var); + if (str_val == NULL) { + return default_val; + } + + Int32 result; + if (!ParseInt32(Message() << "The value of environment variable " << var, + str_val, &result)) { + exit(EXIT_FAILURE); + } + return result; +} + +// Given the total number of shards, the shard index, and the test id, +// returns true iff the test should be run on this shard. The test id is +// some arbitrary but unique non-negative integer assigned to each test +// method. Assumes that 0 <= shard_index < total_shards. +bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { + return (test_id % total_shards) == shard_index; +} + +// Compares the name of each test with the user-specified filter to +// decide whether the test should be run, then records the result in +// each TestCase and TestInfo object. +// If shard_tests == true, further filters tests based on sharding +// variables in the environment - see +// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide. +// Returns the number of tests that should run. +int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { + const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestTotalShards, -1) : -1; + const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestShardIndex, -1) : -1; + + // num_runnable_tests are the number of tests that will + // run across all shards (i.e., match filter and are not disabled). + // num_selected_tests are the number of tests to be run on + // this shard. + int num_runnable_tests = 0; + int num_selected_tests = 0; + for (size_t i = 0; i < test_cases_.size(); i++) { + TestCase* const test_case = test_cases_[i]; + const std::string &test_case_name = test_case->name(); + test_case->set_should_run(false); + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + TestInfo* const test_info = test_case->test_info_list()[j]; + const std::string test_name(test_info->name()); + // A test is disabled if test case name or test name matches + // kDisableTestFilter. + const bool is_disabled = + internal::UnitTestOptions::MatchesFilter(test_case_name, + kDisableTestFilter) || + internal::UnitTestOptions::MatchesFilter(test_name, + kDisableTestFilter); + test_info->is_disabled_ = is_disabled; + + const bool matches_filter = + internal::UnitTestOptions::FilterMatchesTest(test_case_name, + test_name); + test_info->matches_filter_ = matches_filter; + + const bool is_runnable = + (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && + matches_filter; + + const bool is_selected = is_runnable && + (shard_tests == IGNORE_SHARDING_PROTOCOL || + ShouldRunTestOnShard(total_shards, shard_index, + num_runnable_tests)); + + num_runnable_tests += is_runnable; + num_selected_tests += is_selected; + + test_info->should_run_ = is_selected; + test_case->set_should_run(test_case->should_run() || is_selected); + } + } + return num_selected_tests; +} + +// Prints the given C-string on a single line by replacing all '\n' +// characters with string "\\n". If the output takes more than +// max_length characters, only prints the first max_length characters +// and "...". +static void PrintOnOneLine(const char* str, int max_length) { + if (str != NULL) { + for (int i = 0; *str != '\0'; ++str) { + if (i >= max_length) { + printf("..."); + break; + } + if (*str == '\n') { + printf("\\n"); + i += 2; + } else { + printf("%c", *str); + ++i; + } + } + } +} + +// Prints the names of the tests matching the user-specified filter flag. +void UnitTestImpl::ListTestsMatchingFilter() { + // Print at most this many characters for each type/value parameter. + const int kMaxParamLength = 250; + + for (size_t i = 0; i < test_cases_.size(); i++) { + const TestCase* const test_case = test_cases_[i]; + bool printed_test_case_name = false; + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + const TestInfo* const test_info = + test_case->test_info_list()[j]; + if (test_info->matches_filter_) { + if (!printed_test_case_name) { + printed_test_case_name = true; + printf("%s.", test_case->name()); + if (test_case->type_param() != NULL) { + printf(" # %s = ", kTypeParamLabel); + // We print the type parameter on a single line to make + // the output easy to parse by a program. + PrintOnOneLine(test_case->type_param(), kMaxParamLength); + } + printf("\n"); + } + printf(" %s", test_info->name()); + if (test_info->value_param() != NULL) { + printf(" # %s = ", kValueParamLabel); + // We print the value parameter on a single line to make the + // output easy to parse by a program. + PrintOnOneLine(test_info->value_param(), kMaxParamLength); + } + printf("\n"); + } + } + } + fflush(stdout); +} + +// Sets the OS stack trace getter. +// +// Does nothing if the input and the current OS stack trace getter are +// the same; otherwise, deletes the old getter and makes the input the +// current getter. +void UnitTestImpl::set_os_stack_trace_getter( + OsStackTraceGetterInterface* getter) { + if (os_stack_trace_getter_ != getter) { + delete os_stack_trace_getter_; + os_stack_trace_getter_ = getter; + } +} + +// Returns the current OS stack trace getter if it is not NULL; +// otherwise, creates an OsStackTraceGetter, makes it the current +// getter, and returns it. +OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { + if (os_stack_trace_getter_ == NULL) { + os_stack_trace_getter_ = new OsStackTraceGetter; + } + + return os_stack_trace_getter_; +} + +// Returns the TestResult for the test that's currently running, or +// the TestResult for the ad hoc test if no test is running. +TestResult* UnitTestImpl::current_test_result() { + return current_test_info_ ? + &(current_test_info_->result_) : &ad_hoc_test_result_; +} + +// Shuffles all test cases, and the tests within each test case, +// making sure that death tests are still run first. +void UnitTestImpl::ShuffleTests() { + // Shuffles the death test cases. + ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_); + + // Shuffles the non-death test cases. + ShuffleRange(random(), last_death_test_case_ + 1, + static_cast<int>(test_cases_.size()), &test_case_indices_); + + // Shuffles the tests inside each test case. + for (size_t i = 0; i < test_cases_.size(); i++) { + test_cases_[i]->ShuffleTests(random()); + } +} + +// Restores the test cases and tests to their order before the first shuffle. +void UnitTestImpl::UnshuffleTests() { + for (size_t i = 0; i < test_cases_.size(); i++) { + // Unshuffles the tests in each test case. + test_cases_[i]->UnshuffleTests(); + // Resets the index of each test case. + test_case_indices_[i] = static_cast<int>(i); + } +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in +// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. +std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, + int skip_count) { + // We pass skip_count + 1 to skip this wrapper function in addition + // to what the user really wants to skip. + return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1); +} + +// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to +// suppress unreachable code warnings. +namespace { +class ClassUniqueToAlwaysTrue {}; +} + +bool IsTrue(bool condition) { return condition; } + +bool AlwaysTrue() { +#if GTEST_HAS_EXCEPTIONS + // This condition is always false so AlwaysTrue() never actually throws, + // but it makes the compiler think that it may throw. + if (IsTrue(false)) + throw ClassUniqueToAlwaysTrue(); +#endif // GTEST_HAS_EXCEPTIONS + return true; +} + +// If *pstr starts with the given prefix, modifies *pstr to be right +// past the prefix and returns true; otherwise leaves *pstr unchanged +// and returns false. None of pstr, *pstr, and prefix can be NULL. +bool SkipPrefix(const char* prefix, const char** pstr) { + const size_t prefix_len = strlen(prefix); + if (strncmp(*pstr, prefix, prefix_len) == 0) { + *pstr += prefix_len; + return true; + } + return false; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or NULL if the parsing failed. +const char* ParseFlagValue(const char* str, + const char* flag, + bool def_optional) { + // str and flag must not be NULL. + if (str == NULL || flag == NULL) return NULL; + + // The flag must start with "--" followed by GTEST_FLAG_PREFIX_. + const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag; + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) { + return flag_end; + } + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return NULL; + + // Returns the string after "=". + return flag_end + 1; +} + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + return ParseInt32(Message() << "The value of flag --" << flag, + value_str, value); +} + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + *value = value_str; + return true; +} + +// Determines whether a string has a prefix that Google Test uses for its +// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_. +// If Google Test detects that a command line flag has its prefix but is not +// recognized, it will print its help message. Flags starting with +// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test +// internal flags and do not trigger the help message. +static bool HasGoogleTestFlagPrefix(const char* str) { + return (SkipPrefix("--", &str) || + SkipPrefix("-", &str) || + SkipPrefix("/", &str)) && + !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) && + (SkipPrefix(GTEST_FLAG_PREFIX_, &str) || + SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str)); +} + +// Prints a string containing code-encoded text. The following escape +// sequences can be used in the string to control the text color: +// +// @@ prints a single '@' character. +// @R changes the color to red. +// @G changes the color to green. +// @Y changes the color to yellow. +// @D changes to the default terminal text color. +// +// TODO(wan@google.com): Write tests for this once we add stdout +// capturing to Google Test. +static void PrintColorEncoded(const char* str) { + GTestColor color = COLOR_DEFAULT; // The current color. + + // Conceptually, we split the string into segments divided by escape + // sequences. Then we print one segment at a time. At the end of + // each iteration, the str pointer advances to the beginning of the + // next segment. + for (;;) { + const char* p = strchr(str, '@'); + if (p == NULL) { + ColoredPrintf(color, "%s", str); + return; + } + + ColoredPrintf(color, "%s", std::string(str, p).c_str()); + + const char ch = p[1]; + str = p + 2; + if (ch == '@') { + ColoredPrintf(color, "@"); + } else if (ch == 'D') { + color = COLOR_DEFAULT; + } else if (ch == 'R') { + color = COLOR_RED; + } else if (ch == 'G') { + color = COLOR_GREEN; + } else if (ch == 'Y') { + color = COLOR_YELLOW; + } else { + --str; + } + } +} + +static const char kColorEncodedHelpMessage[] = +"This program contains tests written using " GTEST_NAME_ ". You can use the\n" +"following command line flags to control its behavior:\n" +"\n" +"Test Selection:\n" +" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n" +" List the names of all tests instead of running them. The name of\n" +" TEST(Foo, Bar) is \"Foo.Bar\".\n" +" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS" + "[@G-@YNEGATIVE_PATTERNS]@D\n" +" Run only the tests whose name matches one of the positive patterns but\n" +" none of the negative patterns. '?' matches any single character; '*'\n" +" matches any substring; ':' separates two patterns.\n" +" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" +" Run all disabled tests too.\n" +"\n" +"Test Execution:\n" +" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n" +" Run the tests repeatedly; use a negative count to repeat forever.\n" +" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n" +" Randomize tests' orders on every iteration.\n" +" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n" +" Random number seed to use for shuffling test orders (between 1 and\n" +" 99999, or 0 to use a seed based on the current time).\n" +"\n" +"Test Output:\n" +" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n" +" Enable/disable colored output. The default is @Gauto@D.\n" +" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" +" Don't print the elapsed time of each test.\n" +" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G" + GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" +" Generate an XML report in the given directory or with the given file\n" +" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" +#if GTEST_CAN_STREAM_RESULTS_ +" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" +" Stream test results to the given server.\n" +#endif // GTEST_CAN_STREAM_RESULTS_ +"\n" +"Assertion Behavior:\n" +#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" +" Set the default death test style.\n" +#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" +" Turn assertion failures into debugger break-points.\n" +" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" +" Turn assertion failures into C++ exceptions.\n" +" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" +" Do not report exceptions as test failures. Instead, allow them\n" +" to crash the program or throw a pop-up (on Windows).\n" +"\n" +"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set " + "the corresponding\n" +"environment variable of a flag (all letters in upper-case). For example, to\n" +"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_ + "color=no@D or set\n" +"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n" +"\n" +"For more information, please read the " GTEST_NAME_ " documentation at\n" +"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n" +"(not one in your own code or tests), please report it to\n" +"@G<" GTEST_DEV_EMAIL_ ">@D.\n"; + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. The type parameter CharType can be +// instantiated to either char or wchar_t. +template <typename CharType> +void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { + for (int i = 1; i < *argc; i++) { + const std::string arg_string = StreamableToString(argv[i]); + const char* const arg = arg_string.c_str(); + + using internal::ParseBoolFlag; + using internal::ParseInt32Flag; + using internal::ParseStringFlag; + + // Do we see a Google Test flag? + if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, + >EST_FLAG(also_run_disabled_tests)) || + ParseBoolFlag(arg, kBreakOnFailureFlag, + >EST_FLAG(break_on_failure)) || + ParseBoolFlag(arg, kCatchExceptionsFlag, + >EST_FLAG(catch_exceptions)) || + ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || + ParseStringFlag(arg, kDeathTestStyleFlag, + >EST_FLAG(death_test_style)) || + ParseBoolFlag(arg, kDeathTestUseFork, + >EST_FLAG(death_test_use_fork)) || + ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || + ParseStringFlag(arg, kInternalRunDeathTestFlag, + >EST_FLAG(internal_run_death_test)) || + ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || + ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || + ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || + ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || + ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || + ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || + ParseInt32Flag(arg, kStackTraceDepthFlag, + >EST_FLAG(stack_trace_depth)) || + ParseStringFlag(arg, kStreamResultToFlag, + >EST_FLAG(stream_result_to)) || + ParseBoolFlag(arg, kThrowOnFailureFlag, + >EST_FLAG(throw_on_failure)) + ) { + // Yes. Shift the remainder of the argv list left by one. Note + // that argv has (*argc + 1) elements, the last one always being + // NULL. The following loop moves the trailing NULL element as + // well. + for (int j = i; j != *argc; j++) { + argv[j] = argv[j + 1]; + } + + // Decrements the argument count. + (*argc)--; + + // We also need to decrement the iterator as we just removed + // an element. + i--; + } else if (arg_string == "--help" || arg_string == "-h" || + arg_string == "-?" || arg_string == "/?" || + HasGoogleTestFlagPrefix(arg)) { + // Both help flag and unrecognized Google Test flags (excluding + // internal ones) trigger help display. + g_help_flag = true; + } + } + + if (g_help_flag) { + // We print the help here instead of in RUN_ALL_TESTS(), as the + // latter may not be called at all if the user is using Google + // Test with another testing framework. + PrintColorEncoded(kColorEncodedHelpMessage); + } +} + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. +void ParseGoogleTestFlagsOnly(int* argc, char** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} +void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} + +// The internal implementation of InitGoogleTest(). +// +// The type parameter CharType can be instantiated to either char or +// wchar_t. +template <typename CharType> +void InitGoogleTestImpl(int* argc, CharType** argv) { + g_init_gtest_count++; + + // We don't want to run the initialization code twice. + if (g_init_gtest_count != 1) return; + + if (*argc <= 0) return; + + internal::g_executable_path = internal::StreamableToString(argv[0]); + +#if GTEST_HAS_DEATH_TEST + + g_argvs.clear(); + for (int i = 0; i != *argc; i++) { + g_argvs.push_back(StreamableToString(argv[i])); + } + +#endif // GTEST_HAS_DEATH_TEST + + ParseGoogleTestFlagsOnly(argc, argv); + GetUnitTestImpl()->PostFlagParsingInit(); +} + +} // namespace internal + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +// +// Calling the function for the second time has no user-visible effect. +void InitGoogleTest(int* argc, char** argv) { + internal::InitGoogleTestImpl(argc, argv); +} + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +void InitGoogleTest(int* argc, wchar_t** argv) { + internal::InitGoogleTestImpl(argc, argv); +} + +} // namespace testing +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) +// +// This file implements death tests. + + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_OS_MAC +# include <crt_externs.h> +# endif // GTEST_OS_MAC + +# include <errno.h> +# include <fcntl.h> +# include <limits.h> + +# if GTEST_OS_LINUX +# include <signal.h> +# endif // GTEST_OS_LINUX + +# include <stdarg.h> + +# if GTEST_OS_WINDOWS +# include <windows.h> +# else +# include <sys/mman.h> +# include <sys/wait.h> +# endif // GTEST_OS_WINDOWS + +# if GTEST_OS_QNX +# include <spawn.h> +# endif // GTEST_OS_QNX + +#endif // GTEST_HAS_DEATH_TEST + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +// Constants. + +// The default death test style. +static const char kDefaultDeathTestStyle[] = "fast"; + +GTEST_DEFINE_string_( + death_test_style, + internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), + "Indicates how to run a death test in a forked child process: " + "\"threadsafe\" (child process re-executes the test binary " + "from the beginning, running only the specific death test) or " + "\"fast\" (child process runs the death test immediately " + "after forking)."); + +GTEST_DEFINE_bool_( + death_test_use_fork, + internal::BoolFromGTestEnv("death_test_use_fork", false), + "Instructs to use fork()/_exit() instead of clone() in death tests. " + "Ignored and always uses fork() on POSIX systems where clone() is not " + "implemented. Useful when running under valgrind or similar tools if " + "those do not support clone(). Valgrind 3.3.1 will just fail if " + "it sees an unsupported combination of clone() flags. " + "It is not recommended to use this flag w/o valgrind though it will " + "work in 99% of the cases. Once valgrind is fixed, this flag will " + "most likely be removed."); + +namespace internal { +GTEST_DEFINE_string_( + internal_run_death_test, "", + "Indicates the file, line number, temporal index of " + "the single death test to run, and a file descriptor to " + "which a success code may be sent, all separated by " + "the '|' characters. This flag is specified if and only if the current " + "process is a sub-process launched for running a thread-safe " + "death test. FOR INTERNAL USE ONLY."); +} // namespace internal + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Valid only for fast death tests. Indicates the code is running in the +// child process of a fast style death test. +static bool g_in_fast_death_test_child = false; + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +bool InDeathTestChild() { +# if GTEST_OS_WINDOWS + + // On Windows, death tests are thread-safe regardless of the value of the + // death_test_style flag. + return !GTEST_FLAG(internal_run_death_test).empty(); + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") + return !GTEST_FLAG(internal_run_death_test).empty(); + else + return g_in_fast_death_test_child; +#endif +} + +} // namespace internal + +// ExitedWithCode constructor. +ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { +} + +// ExitedWithCode function-call operator. +bool ExitedWithCode::operator()(int exit_status) const { +# if GTEST_OS_WINDOWS + + return exit_status == exit_code_; + +# else + + return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; + +# endif // GTEST_OS_WINDOWS +} + +# if !GTEST_OS_WINDOWS +// KilledBySignal constructor. +KilledBySignal::KilledBySignal(int signum) : signum_(signum) { +} + +// KilledBySignal function-call operator. +bool KilledBySignal::operator()(int exit_status) const { + return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; +} +# endif // !GTEST_OS_WINDOWS + +namespace internal { + +// Utilities needed for death tests. + +// Generates a textual description of a given exit code, in the format +// specified by wait(2). +static std::string ExitSummary(int exit_code) { + Message m; + +# if GTEST_OS_WINDOWS + + m << "Exited with exit status " << exit_code; + +# else + + if (WIFEXITED(exit_code)) { + m << "Exited with exit status " << WEXITSTATUS(exit_code); + } else if (WIFSIGNALED(exit_code)) { + m << "Terminated by signal " << WTERMSIG(exit_code); + } +# ifdef WCOREDUMP + if (WCOREDUMP(exit_code)) { + m << " (core dumped)"; + } +# endif +# endif // GTEST_OS_WINDOWS + + return m.GetString(); +} + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status) { + return !ExitedWithCode(0)(exit_status); +} + +# if !GTEST_OS_WINDOWS +// Generates a textual failure message when a death test finds more than +// one thread running, or cannot determine the number of threads, prior +// to executing the given statement. It is the responsibility of the +// caller not to pass a thread_count of 1. +static std::string DeathTestThreadWarning(size_t thread_count) { + Message msg; + msg << "Death tests use fork(), which is unsafe particularly" + << " in a threaded context. For this test, " << GTEST_NAME_ << " "; + if (thread_count == 0) + msg << "couldn't detect the number of threads."; + else + msg << "detected " << thread_count << " threads."; + return msg.GetString(); +} +# endif // !GTEST_OS_WINDOWS + +// Flag characters for reporting a death test that did not die. +static const char kDeathTestLived = 'L'; +static const char kDeathTestReturned = 'R'; +static const char kDeathTestThrew = 'T'; +static const char kDeathTestInternalError = 'I'; + +// An enumeration describing all of the possible ways that a death test can +// conclude. DIED means that the process died while executing the test +// code; LIVED means that process lived beyond the end of the test code; +// RETURNED means that the test statement attempted to execute a return +// statement, which is not allowed; THREW means that the test statement +// returned control by throwing an exception. IN_PROGRESS means the test +// has not yet concluded. +// TODO(vladl@google.com): Unify names and possibly values for +// AbortReason, DeathTestOutcome, and flag characters above. +enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; + +// Routine for aborting the program which is safe to call from an +// exec-style death test child process, in which case the error +// message is propagated back to the parent process. Otherwise, the +// message is simply printed to stderr. In either case, the program +// then exits with status 1. +void DeathTestAbort(const std::string& message) { + // On a POSIX system, this function may be called from a threadsafe-style + // death test child process, which operates on a very small stack. Use + // the heap for any additional non-minuscule memory requirements. + const InternalRunDeathTestFlag* const flag = + GetUnitTestImpl()->internal_run_death_test_flag(); + if (flag != NULL) { + FILE* parent = posix::FDOpen(flag->write_fd(), "w"); + fputc(kDeathTestInternalError, parent); + fprintf(parent, "%s", message.c_str()); + fflush(parent); + _exit(1); + } else { + fprintf(stderr, "%s", message.c_str()); + fflush(stderr); + posix::Abort(); + } +} + +// A replacement for CHECK that calls DeathTestAbort if the assertion +// fails. +# define GTEST_DEATH_TEST_CHECK_(expression) \ + do { \ + if (!::testing::internal::IsTrue(expression)) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for +// evaluating any system call that fulfills two conditions: it must return +// -1 on failure, and set errno to EINTR when it is interrupted and +// should be tried again. The macro expands to a loop that repeatedly +// evaluates the expression as long as it evaluates to -1 and sets +// errno to EINTR. If the expression evaluates to -1 but errno is +// something other than EINTR, DeathTestAbort is called. +# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ + do { \ + int gtest_retval; \ + do { \ + gtest_retval = (expression); \ + } while (gtest_retval == -1 && errno == EINTR); \ + if (gtest_retval == -1) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression + " != -1"); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// Returns the message describing the last system error in errno. +std::string GetLastErrnoDescription() { + return errno == 0 ? "" : posix::StrError(errno); +} + +// This is called from a death test parent process to read a failure +// message from the death test child process and log it with the FATAL +// severity. On Windows, the message is read from a pipe handle. On other +// platforms, it is read from a file descriptor. +static void FailFromInternalError(int fd) { + Message error; + char buffer[256]; + int num_read; + + do { + while ((num_read = posix::Read(fd, buffer, 255)) > 0) { + buffer[num_read] = '\0'; + error << buffer; + } + } while (num_read == -1 && errno == EINTR); + + if (num_read == 0) { + GTEST_LOG_(FATAL) << error.GetString(); + } else { + const int last_error = errno; + GTEST_LOG_(FATAL) << "Error while reading death test internal: " + << GetLastErrnoDescription() << " [" << last_error << "]"; + } +} + +// Death test constructor. Increments the running death test count +// for the current test. +DeathTest::DeathTest() { + TestInfo* const info = GetUnitTestImpl()->current_test_info(); + if (info == NULL) { + DeathTestAbort("Cannot run a death test outside of a TEST or " + "TEST_F construct"); + } +} + +// Creates and returns a death test by dispatching to the current +// death test factory. +bool DeathTest::Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) { + return GetUnitTestImpl()->death_test_factory()->Create( + statement, regex, file, line, test); +} + +const char* DeathTest::LastMessage() { + return last_death_test_message_.c_str(); +} + +void DeathTest::set_last_death_test_message(const std::string& message) { + last_death_test_message_ = message; +} + +std::string DeathTest::last_death_test_message_; + +// Provides cross platform implementation for some death functionality. +class DeathTestImpl : public DeathTest { + protected: + DeathTestImpl(const char* a_statement, const RE* a_regex) + : statement_(a_statement), + regex_(a_regex), + spawned_(false), + status_(-1), + outcome_(IN_PROGRESS), + read_fd_(-1), + write_fd_(-1) {} + + // read_fd_ is expected to be closed and cleared by a derived class. + ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } + + void Abort(AbortReason reason); + virtual bool Passed(bool status_ok); + + const char* statement() const { return statement_; } + const RE* regex() const { return regex_; } + bool spawned() const { return spawned_; } + void set_spawned(bool is_spawned) { spawned_ = is_spawned; } + int status() const { return status_; } + void set_status(int a_status) { status_ = a_status; } + DeathTestOutcome outcome() const { return outcome_; } + void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } + int read_fd() const { return read_fd_; } + void set_read_fd(int fd) { read_fd_ = fd; } + int write_fd() const { return write_fd_; } + void set_write_fd(int fd) { write_fd_ = fd; } + + // Called in the parent process only. Reads the result code of the death + // test child process via a pipe, interprets it to set the outcome_ + // member, and closes read_fd_. Outputs diagnostics and terminates in + // case of unexpected codes. + void ReadAndInterpretStatusByte(); + + private: + // The textual content of the code this object is testing. This class + // doesn't own this string and should not attempt to delete it. + const char* const statement_; + // The regular expression which test output must match. DeathTestImpl + // doesn't own this object and should not attempt to delete it. + const RE* const regex_; + // True if the death test child process has been successfully spawned. + bool spawned_; + // The exit status of the child process. + int status_; + // How the death test concluded. + DeathTestOutcome outcome_; + // Descriptor to the read end of the pipe to the child process. It is + // always -1 in the child process. The child keeps its write end of the + // pipe in write_fd_. + int read_fd_; + // Descriptor to the child's write end of the pipe to the parent process. + // It is always -1 in the parent process. The parent keeps its end of the + // pipe in read_fd_. + int write_fd_; +}; + +// Called in the parent process only. Reads the result code of the death +// test child process via a pipe, interprets it to set the outcome_ +// member, and closes read_fd_. Outputs diagnostics and terminates in +// case of unexpected codes. +void DeathTestImpl::ReadAndInterpretStatusByte() { + char flag; + int bytes_read; + + // The read() here blocks until data is available (signifying the + // failure of the death test) or until the pipe is closed (signifying + // its success), so it's okay to call this in the parent before + // the child process has exited. + do { + bytes_read = posix::Read(read_fd(), &flag, 1); + } while (bytes_read == -1 && errno == EINTR); + + if (bytes_read == 0) { + set_outcome(DIED); + } else if (bytes_read == 1) { + switch (flag) { + case kDeathTestReturned: + set_outcome(RETURNED); + break; + case kDeathTestThrew: + set_outcome(THREW); + break; + case kDeathTestLived: + set_outcome(LIVED); + break; + case kDeathTestInternalError: + FailFromInternalError(read_fd()); // Does not return. + break; + default: + GTEST_LOG_(FATAL) << "Death test child process reported " + << "unexpected status byte (" + << static_cast<unsigned int>(flag) << ")"; + } + } else { + GTEST_LOG_(FATAL) << "Read from death test child process failed: " + << GetLastErrnoDescription(); + } + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); + set_read_fd(-1); +} + +// Signals that the death test code which should have exited, didn't. +// Should be called only in a death test child process. +// Writes a status byte to the child's status file descriptor, then +// calls _exit(1). +void DeathTestImpl::Abort(AbortReason reason) { + // The parent process considers the death test to be a failure if + // it finds any data in our pipe. So, here we write a single flag byte + // to the pipe, then exit. + const char status_ch = + reason == TEST_DID_NOT_DIE ? kDeathTestLived : + reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; + + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); + // We are leaking the descriptor here because on some platforms (i.e., + // when built as Windows DLL), destructors of global objects will still + // run after calling _exit(). On such systems, write_fd_ will be + // indirectly closed from the destructor of UnitTestImpl, causing double + // close if it is also closed here. On debug configurations, double close + // may assert. As there are no in-process buffers to flush here, we are + // relying on the OS to close the descriptor after the process terminates + // when the destructors are not run. + _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) +} + +// Returns an indented copy of stderr output for a death test. +// This makes distinguishing death test output lines from regular log lines +// much easier. +static ::std::string FormatDeathTestOutput(const ::std::string& output) { + ::std::string ret; + for (size_t at = 0; ; ) { + const size_t line_end = output.find('\n', at); + ret += "[ DEATH ] "; + if (line_end == ::std::string::npos) { + ret += output.substr(at); + break; + } + ret += output.substr(at, line_end + 1 - at); + at = line_end + 1; + } + return ret; +} + +// Assesses the success or failure of a death test, using both private +// members which have previously been set, and one argument: +// +// Private data members: +// outcome: An enumeration describing how the death test +// concluded: DIED, LIVED, THREW, or RETURNED. The death test +// fails in the latter three cases. +// status: The exit status of the child process. On *nix, it is in the +// in the format specified by wait(2). On Windows, this is the +// value supplied to the ExitProcess() API or a numeric code +// of the exception that terminated the program. +// regex: A regular expression object to be applied to +// the test's captured standard error output; the death test +// fails if it does not match. +// +// Argument: +// status_ok: true if exit_status is acceptable in the context of +// this particular death test, which fails if it is false +// +// Returns true iff all of the above conditions are met. Otherwise, the +// first failing condition, in the order given above, is the one that is +// reported. Also sets the last death test message string. +bool DeathTestImpl::Passed(bool status_ok) { + if (!spawned()) + return false; + + const std::string error_message = GetCapturedStderr(); + + bool success = false; + Message buffer; + + buffer << "Death test: " << statement() << "\n"; + switch (outcome()) { + case LIVED: + buffer << " Result: failed to die.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case THREW: + buffer << " Result: threw an exception.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case RETURNED: + buffer << " Result: illegal return in test statement.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case DIED: + if (status_ok) { + const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); + if (matched) { + success = true; + } else { + buffer << " Result: died but not with expected error.\n" + << " Expected: " << regex()->pattern() << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + } else { + buffer << " Result: died but not with expected exit code:\n" + << " " << ExitSummary(status()) << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + break; + case IN_PROGRESS: + default: + GTEST_LOG_(FATAL) + << "DeathTest::Passed somehow called before conclusion of test"; + } + + DeathTest::set_last_death_test_message(buffer.GetString()); + return success; +} + +# if GTEST_OS_WINDOWS +// WindowsDeathTest implements death tests on Windows. Due to the +// specifics of starting new processes on Windows, death tests there are +// always threadsafe, and Google Test considers the +// --gtest_death_test_style=fast setting to be equivalent to +// --gtest_death_test_style=threadsafe there. +// +// A few implementation notes: Like the Linux version, the Windows +// implementation uses pipes for child-to-parent communication. But due to +// the specifics of pipes on Windows, some extra steps are required: +// +// 1. The parent creates a communication pipe and stores handles to both +// ends of it. +// 2. The parent starts the child and provides it with the information +// necessary to acquire the handle to the write end of the pipe. +// 3. The child acquires the write end of the pipe and signals the parent +// using a Windows event. +// 4. Now the parent can release the write end of the pipe on its side. If +// this is done before step 3, the object's reference count goes down to +// 0 and it is destroyed, preventing the child from acquiring it. The +// parent now has to release it, or read operations on the read end of +// the pipe will not return when the child terminates. +// 5. The parent reads child's output through the pipe (outcome code and +// any possible error messages) from the pipe, and its stderr and then +// determines whether to fail the test. +// +// Note: to distinguish Win32 API calls from the local method and function +// calls, the former are explicitly resolved in the global namespace. +// +class WindowsDeathTest : public DeathTestImpl { + public: + WindowsDeathTest(const char* a_statement, + const RE* a_regex, + const char* file, + int line) + : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual TestRole AssumeRole(); + + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; + // Handle to the write end of the pipe to the child process. + AutoHandle write_handle_; + // Child process handle. + AutoHandle child_handle_; + // Event the child process uses to signal the parent that it has + // acquired the handle to the write end of the pipe. After seeing this + // event the parent can release its own handles to make sure its + // ReadFile() calls return when the child terminates. + AutoHandle event_handle_; +}; + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int WindowsDeathTest::Wait() { + if (!spawned()) + return 0; + + // Wait until the child either signals that it has acquired the write end + // of the pipe or it dies. + const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; + switch (::WaitForMultipleObjects(2, + wait_handles, + FALSE, // Waits for any of the handles. + INFINITE)) { + case WAIT_OBJECT_0: + case WAIT_OBJECT_0 + 1: + break; + default: + GTEST_DEATH_TEST_CHECK_(false); // Should not get here. + } + + // The child has acquired the write end of the pipe or exited. + // We release the handle on our side and continue. + write_handle_.Reset(); + event_handle_.Reset(); + + ReadAndInterpretStatusByte(); + + // Waits for the child process to exit if it haven't already. This + // returns immediately if the child has already exited, regardless of + // whether previous calls to WaitForMultipleObjects synchronized on this + // handle or not. + GTEST_DEATH_TEST_CHECK_( + WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), + INFINITE)); + DWORD status_code; + GTEST_DEATH_TEST_CHECK_( + ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); + child_handle_.Reset(); + set_status(static_cast<int>(status_code)); + return status(); +} + +// The AssumeRole process for a Windows death test. It creates a child +// process with the same executable as the current process to run the +// death test. The child process is given the --gtest_filter and +// --gtest_internal_run_death_test flags such that it knows to run the +// current death test only. +DeathTest::TestRole WindowsDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + // ParseInternalRunDeathTestFlag() has performed all the necessary + // processing. + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + // WindowsDeathTest uses an anonymous pipe to communicate results of + // a death test. + SECURITY_ATTRIBUTES handles_are_inheritable = { + sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; + HANDLE read_handle, write_handle; + GTEST_DEATH_TEST_CHECK_( + ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, + 0) // Default buffer size. + != FALSE); + set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle), + O_RDONLY)); + write_handle_.Reset(write_handle); + event_handle_.Reset(::CreateEvent( + &handles_are_inheritable, + TRUE, // The event will automatically reset to non-signaled state. + FALSE, // The initial state is non-signalled. + NULL)); // The even is unnamed. + GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + + "=" + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) + + // size_t has the same width as pointers on both 32-bit and 64-bit + // Windows platforms. + // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. + "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) + + "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get())); + + char executable_path[_MAX_PATH + 1]; // NOLINT + GTEST_DEATH_TEST_CHECK_( + _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, + executable_path, + _MAX_PATH)); + + std::string command_line = + std::string(::GetCommandLineA()) + " " + filter_flag + " \"" + + internal_flag + "\""; + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // Flush the log buffers since the log streams are shared with the child. + FlushInfoLog(); + + // The child process will share the standard handles with the parent. + STARTUPINFOA startup_info; + memset(&startup_info, 0, sizeof(STARTUPINFO)); + startup_info.dwFlags = STARTF_USESTDHANDLES; + startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); + startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); + startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); + + PROCESS_INFORMATION process_info; + GTEST_DEATH_TEST_CHECK_(::CreateProcessA( + executable_path, + const_cast<char*>(command_line.c_str()), + NULL, // Retuned process handle is not inheritable. + NULL, // Retuned thread handle is not inheritable. + TRUE, // Child inherits all inheritable handles (for write_handle_). + 0x0, // Default creation flags. + NULL, // Inherit the parent's environment. + UnitTest::GetInstance()->original_working_dir(), + &startup_info, + &process_info) != FALSE); + child_handle_.Reset(process_info.hProcess); + ::CloseHandle(process_info.hThread); + set_spawned(true); + return OVERSEE_TEST; +} +# else // We are not on Windows. + +// ForkingDeathTest provides implementations for most of the abstract +// methods of the DeathTest interface. Only the AssumeRole method is +// left undefined. +class ForkingDeathTest : public DeathTestImpl { + public: + ForkingDeathTest(const char* statement, const RE* regex); + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + + protected: + void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } + + private: + // PID of child process during death test; 0 in the child process itself. + pid_t child_pid_; +}; + +// Constructs a ForkingDeathTest. +ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) + : DeathTestImpl(a_statement, a_regex), + child_pid_(-1) {} + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int ForkingDeathTest::Wait() { + if (!spawned()) + return 0; + + ReadAndInterpretStatusByte(); + + int status_value; + GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); + set_status(status_value); + return status_value; +} + +// A concrete death test class that forks, then immediately runs the test +// in the child process. +class NoExecDeathTest : public ForkingDeathTest { + public: + NoExecDeathTest(const char* a_statement, const RE* a_regex) : + ForkingDeathTest(a_statement, a_regex) { } + virtual TestRole AssumeRole(); +}; + +// The AssumeRole process for a fork-and-run death test. It implements a +// straightforward fork, with a simple pipe to transmit the status byte. +DeathTest::TestRole NoExecDeathTest::AssumeRole() { + const size_t thread_count = GetThreadCount(); + if (thread_count != 1) { + GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + + DeathTest::set_last_death_test_message(""); + CaptureStderr(); + // When we fork the process below, the log file buffers are copied, but the + // file descriptors are shared. We flush all log files here so that closing + // the file descriptors in the child process doesn't throw off the + // synchronization between descriptors and buffers in the parent process. + // This is as close to the fork as possible to avoid a race condition in case + // there are multiple threads running before the death test, and another + // thread writes to the log file. + FlushInfoLog(); + + const pid_t child_pid = fork(); + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + set_child_pid(child_pid); + if (child_pid == 0) { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); + set_write_fd(pipe_fd[1]); + // Redirects all logging to stderr in the child process to prevent + // concurrent writes to the log files. We capture stderr in the parent + // process and append the child process' output to a log. + LogToStderr(); + // Event forwarding to the listeners of event listener API mush be shut + // down in death test subprocesses. + GetUnitTestImpl()->listeners()->SuppressEventForwarding(); + g_in_fast_death_test_child = true; + return EXECUTE_TEST; + } else { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; + } +} + +// A concrete death test class that forks and re-executes the main +// program from the beginning, with command-line flags set that cause +// only this specific death test to be run. +class ExecDeathTest : public ForkingDeathTest { + public: + ExecDeathTest(const char* a_statement, const RE* a_regex, + const char* file, int line) : + ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } + virtual TestRole AssumeRole(); + private: + static ::std::vector<testing::internal::string> + GetArgvsForDeathTestChildProcess() { + ::std::vector<testing::internal::string> args = GetInjectableArgvs(); + return args; + } + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + + ~Arguments() { + for (std::vector<char*>::iterator i = args_.begin(); i != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, posix::StrDup(argument)); + } + + template <typename Str> + void AddArguments(const ::std::vector<Str>& arguments) { + for (typename ::std::vector<Str>::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + + private: + std::vector<char*> args_; +}; + +// A struct that encompasses the arguments to the child process of a +// threadsafe-style death test process. +struct ExecDeathTestArgs { + char* const* argv; // Command-line arguments for the child's call to exec + int close_fd; // File descriptor to close; the read end of a pipe +}; + +# if GTEST_OS_MAC +inline char** GetEnviron() { + // When Google Test is built as a framework on MacOS X, the environ variable + // is unavailable. Apple's documentation (man environ) recommends using + // _NSGetEnviron() instead. + return *_NSGetEnviron(); +} +# else +// Some POSIX platforms expect you to declare environ. extern "C" makes +// it reside in the global namespace. +extern "C" char** environ; +inline char** GetEnviron() { return environ; } +# endif // GTEST_OS_MAC + +# if !GTEST_OS_QNX +// The main function for a threadsafe-style death test child process. +// This function is called in a clone()-ed process and thus must avoid +// any potentially unsafe operations like malloc or libc functions. +static int ExecDeathTestChildMain(void* child_arg) { + ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); + + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + // We can safely call execve() as it's a direct system call. We + // cannot use execvp() as it's a libc function and thus potentially + // unsafe. Since execve() doesn't search the PATH, the user must + // invoke the test program via a valid path that contains at least + // one path separator. + execve(args->argv[0], args->argv, GetEnviron()); + DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " + + original_dir + " failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; +} +# endif // !GTEST_OS_QNX + +// Two utility routines that together determine the direction the stack +// grows. +// This could be accomplished more elegantly by a single recursive +// function, but we want to guard against the unlikely possibility of +// a smart compiler optimizing the recursion away. +// +// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining +// StackLowerThanAddress into StackGrowsDown, which then doesn't give +// correct answer. +void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; +void StackLowerThanAddress(const void* ptr, bool* result) { + int dummy; + *result = (&dummy < ptr); +} + +bool StackGrowsDown() { + int dummy; + bool result; + StackLowerThanAddress(&dummy, &result); + return result; +} + +// Spawns a child process with the same executable as the current process in +// a thread-safe manner and instructs it to run the death test. The +// implementation uses fork(2) + exec. On systems where clone(2) is +// available, it is used instead, being slightly more thread-safe. On QNX, +// fork supports only single-threaded environments, so this function uses +// spawn(2) there instead. The function dies with an error message if +// anything goes wrong. +static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) { + ExecDeathTestArgs args = { argv, close_fd }; + pid_t child_pid = -1; + +# if GTEST_OS_QNX + // Obtains the current directory and sets it to be closed in the child + // process. + const int cwd_fd = open(".", O_RDONLY); + GTEST_DEATH_TEST_CHECK_(cwd_fd != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC)); + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + int fd_flags; + // Set close_fd to be closed after spawn. + GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD)); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD, + fd_flags | FD_CLOEXEC)); + struct inheritance inherit = {0}; + // spawn is a system call. + child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron()); + // Restores the current working directory. + GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd)); + +# else // GTEST_OS_QNX +# if GTEST_OS_LINUX + // When a SIGPROF signal is received while fork() or clone() are executing, + // the process may hang. To avoid this, we ignore SIGPROF here and re-enable + // it after the call to fork()/clone() is complete. + struct sigaction saved_sigprof_action; + struct sigaction ignore_sigprof_action; + memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action)); + sigemptyset(&ignore_sigprof_action.sa_mask); + ignore_sigprof_action.sa_handler = SIG_IGN; + GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction( + SIGPROF, &ignore_sigprof_action, &saved_sigprof_action)); +# endif // GTEST_OS_LINUX + +# if GTEST_HAS_CLONE + const bool use_fork = GTEST_FLAG(death_test_use_fork); + + if (!use_fork) { + static const bool stack_grows_down = StackGrowsDown(); + const size_t stack_size = getpagesize(); + // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. + void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); + + // Maximum stack alignment in bytes: For a downward-growing stack, this + // amount is subtracted from size of the stack space to get an address + // that is within the stack space and is aligned on all systems we care + // about. As far as I know there is no ABI with stack alignment greater + // than 64. We assume stack and stack_size already have alignment of + // kMaxStackAlignment. + const size_t kMaxStackAlignment = 64; + void* const stack_top = + static_cast<char*>(stack) + + (stack_grows_down ? stack_size - kMaxStackAlignment : 0); + GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment && + reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0); + + child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); + + GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); + } +# else + const bool use_fork = true; +# endif // GTEST_HAS_CLONE + + if (use_fork && (child_pid = fork()) == 0) { + ExecDeathTestChildMain(&args); + _exit(0); + } +# endif // GTEST_OS_QNX +# if GTEST_OS_LINUX + GTEST_DEATH_TEST_CHECK_SYSCALL_( + sigaction(SIGPROF, &saved_sigprof_action, NULL)); +# endif // GTEST_OS_LINUX + + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + return child_pid; +} + +// The AssumeRole process for a fork-and-exec death test. It re-executes the +// main program from the beginning, setting the --gtest_filter +// and --gtest_internal_run_death_test flags to cause only the current +// death test to be re-run. +DeathTest::TestRole ExecDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + // Clear the close-on-exec flag on the write end of the pipe, lest + // it be closed when the child process does an exec: + GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); + + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(pipe_fd[1]); + Arguments args; + args.AddArguments(GetArgvsForDeathTestChildProcess()); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // See the comment in NoExecDeathTest::AssumeRole for why the next line + // is necessary. + FlushInfoLog(); + + const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_child_pid(child_pid); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; +} + +# endif // !GTEST_OS_WINDOWS + +// Creates a concrete DeathTest-derived class that depends on the +// --gtest_death_test_style flag, and sets the pointer pointed to +// by the "test" argument to its address. If the test should be +// skipped, sets that pointer to NULL. Returns true, unless the +// flag is set to an invalid value. +bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, + const char* file, int line, + DeathTest** test) { + UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const int death_test_index = impl->current_test_info() + ->increment_death_test_count(); + + if (flag != NULL) { + if (death_test_index > flag->index()) { + DeathTest::set_last_death_test_message( + "Death test count (" + StreamableToString(death_test_index) + + ") somehow exceeded expected maximum (" + + StreamableToString(flag->index()) + ")"); + return false; + } + + if (!(flag->file() == file && flag->line() == line && + flag->index() == death_test_index)) { + *test = NULL; + return true; + } + } + +# if GTEST_OS_WINDOWS + + if (GTEST_FLAG(death_test_style) == "threadsafe" || + GTEST_FLAG(death_test_style) == "fast") { + *test = new WindowsDeathTest(statement, regex, file, line); + } + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") { + *test = new ExecDeathTest(statement, regex, file, line); + } else if (GTEST_FLAG(death_test_style) == "fast") { + *test = new NoExecDeathTest(statement, regex); + } + +# endif // GTEST_OS_WINDOWS + + else { // NOLINT - this is more readable than unbalanced brackets inside #if. + DeathTest::set_last_death_test_message( + "Unknown death test style \"" + GTEST_FLAG(death_test_style) + + "\" encountered"); + return false; + } + + return true; +} + +// Splits a given string on a given delimiter, populating a given +// vector with the fields. GTEST_HAS_DEATH_TEST implies that we have +// ::std::string, so we can use it here. +static void SplitString(const ::std::string& str, char delimiter, + ::std::vector< ::std::string>* dest) { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (::testing::internal::AlwaysTrue()) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + dest->swap(parsed); +} + +# if GTEST_OS_WINDOWS +// Recreates the pipe and event handles from the provided parameters, +// signals the event, and returns a file descriptor wrapped around the pipe +// handle. This function is called in the child process only. +int GetStatusFileDescriptor(unsigned int parent_process_id, + size_t write_handle_as_size_t, + size_t event_handle_as_size_t) { + AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, + FALSE, // Non-inheritable. + parent_process_id)); + if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { + DeathTestAbort("Unable to open parent process " + + StreamableToString(parent_process_id)); + } + + // TODO(vladl@google.com): Replace the following check with a + // compile-time assertion when available. + GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); + + const HANDLE write_handle = + reinterpret_cast<HANDLE>(write_handle_as_size_t); + HANDLE dup_write_handle; + + // The newly initialized handle is accessible only in in the parent + // process. To obtain one accessible within the child, we need to use + // DuplicateHandle. + if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, + ::GetCurrentProcess(), &dup_write_handle, + 0x0, // Requested privileges ignored since + // DUPLICATE_SAME_ACCESS is used. + FALSE, // Request non-inheritable handler. + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the pipe handle " + + StreamableToString(write_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t); + HANDLE dup_event_handle; + + if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, + ::GetCurrentProcess(), &dup_event_handle, + 0x0, + FALSE, + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the event handle " + + StreamableToString(event_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const int write_fd = + ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND); + if (write_fd == -1) { + DeathTestAbort("Unable to convert pipe handle " + + StreamableToString(write_handle_as_size_t) + + " to a file descriptor"); + } + + // Signals the parent that the write end of the pipe has been acquired + // so the parent can release its own write end. + ::SetEvent(dup_event_handle); + + return write_fd; +} +# endif // GTEST_OS_WINDOWS + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { + if (GTEST_FLAG(internal_run_death_test) == "") return NULL; + + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + int line = -1; + int index = -1; + ::std::vector< ::std::string> fields; + SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); + int write_fd = -1; + +# if GTEST_OS_WINDOWS + + unsigned int parent_process_id = 0; + size_t write_handle_as_size_t = 0; + size_t event_handle_as_size_t = 0; + + if (fields.size() != 6 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &parent_process_id) + || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) + || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + write_fd = GetStatusFileDescriptor(parent_process_id, + write_handle_as_size_t, + event_handle_as_size_t); +# else + + if (fields.size() != 4 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &write_fd)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + +# endif // GTEST_OS_WINDOWS + + return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); +} + +} // namespace internal + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) + + +#include <stdlib.h> + +#if GTEST_OS_WINDOWS_MOBILE +# include <windows.h> +#elif GTEST_OS_WINDOWS +# include <direct.h> +# include <io.h> +#elif GTEST_OS_SYMBIAN +// Symbian OpenC has PATH_MAX in sys/syslimits.h +# include <sys/syslimits.h> +#else +# include <limits.h> +# include <climits> // Some Linux distributions define PATH_MAX here. +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_MAX_ _MAX_PATH +#elif defined(PATH_MAX) +# define GTEST_PATH_MAX_ PATH_MAX +#elif defined(_XOPEN_PATH_MAX) +# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX +#else +# define GTEST_PATH_MAX_ _POSIX_PATH_MAX +#endif // GTEST_OS_WINDOWS + + +namespace testing { +namespace internal { + +#if GTEST_OS_WINDOWS +// On Windows, '\\' is the standard path separator, but many tools and the +// Windows API also accept '/' as an alternate path separator. Unless otherwise +// noted, a file path can contain either kind of path separators, or a mixture +// of them. +const char kPathSeparator = '\\'; +const char kAlternatePathSeparator = '/'; +//const char kPathSeparatorString[] = "\\"; +const char kAlternatePathSeparatorString[] = "/"; +# if GTEST_OS_WINDOWS_MOBILE +// Windows CE doesn't have a current directory. You should not use +// the current directory in tests on Windows CE, but this at least +// provides a reasonable fallback. +const char kCurrentDirectoryString[] = "\\"; +// Windows CE doesn't define INVALID_FILE_ATTRIBUTES +const DWORD kInvalidFileAttributes = 0xffffffff; +# else +const char kCurrentDirectoryString[] = ".\\"; +# endif // GTEST_OS_WINDOWS_MOBILE +#else +const char kPathSeparator = '/'; +//const char kPathSeparatorString[] = "/"; +const char kCurrentDirectoryString[] = "./"; +#endif // GTEST_OS_WINDOWS + +// Returns whether the given character is a valid path separator. +static bool IsPathSeparator(char c) { +#if GTEST_HAS_ALT_PATH_SEP_ + return (c == kPathSeparator) || (c == kAlternatePathSeparator); +#else + return c == kPathSeparator; +#endif +} + +// Returns the current working directory, or "" if unsuccessful. +FilePath FilePath::GetCurrentDir() { +#if GTEST_OS_WINDOWS_MOBILE + // Windows CE doesn't have a current directory, so we just return + // something reasonable. + return FilePath(kCurrentDirectoryString); +#elif GTEST_OS_WINDOWS + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#else + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns a copy of the FilePath with the case-insensitive extension removed. +// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns +// FilePath("dir/file"). If a case-insensitive extension is not +// found, returns a copy of the original FilePath. +FilePath FilePath::RemoveExtension(const char* extension) const { + const std::string dot_extension = std::string(".") + extension; + if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) { + return FilePath(pathname_.substr( + 0, pathname_.length() - dot_extension.length())); + } + return *this; +} + +// Returns a pointer to the last occurence of a valid path separator in +// the FilePath. On Windows, for example, both '/' and '\' are valid path +// separators. Returns NULL if no path separator was found. +const char* FilePath::FindLastPathSeparator() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); +#if GTEST_HAS_ALT_PATH_SEP_ + const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); + // Comparing two pointers of which only one is NULL is undefined. + if (last_alt_sep != NULL && + (last_sep == NULL || last_alt_sep > last_sep)) { + return last_alt_sep; + } +#endif + return last_sep; +} + +// Returns a copy of the FilePath with the directory part removed. +// Example: FilePath("path/to/file").RemoveDirectoryName() returns +// FilePath("file"). If there is no directory part ("just_a_file"), it returns +// the FilePath unmodified. If there is no file part ("just_a_dir/") it +// returns an empty FilePath (""). +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveDirectoryName() const { + const char* const last_sep = FindLastPathSeparator(); + return last_sep ? FilePath(last_sep + 1) : *this; +} + +// RemoveFileName returns the directory path with the filename removed. +// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". +// If the FilePath is "a_file" or "/a_file", RemoveFileName returns +// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does +// not have a file, like "just/a/dir/", it returns the FilePath unmodified. +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveFileName() const { + const char* const last_sep = FindLastPathSeparator(); + std::string dir; + if (last_sep) { + dir = std::string(c_str(), last_sep + 1 - c_str()); + } else { + dir = kCurrentDirectoryString; + } + return FilePath(dir); +} + +// Helper functions for naming files in a directory for xml output. + +// Given directory = "dir", base_name = "test", number = 0, +// extension = "xml", returns "dir/test.xml". If number is greater +// than zero (e.g., 12), returns "dir/test_12.xml". +// On Windows platform, uses \ as the separator rather than /. +FilePath FilePath::MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension) { + std::string file; + if (number == 0) { + file = base_name.string() + "." + extension; + } else { + file = base_name.string() + "_" + StreamableToString(number) + + "." + extension; + } + return ConcatPaths(directory, FilePath(file)); +} + +// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". +// On Windows, uses \ as the separator rather than /. +FilePath FilePath::ConcatPaths(const FilePath& directory, + const FilePath& relative_path) { + if (directory.IsEmpty()) + return relative_path; + const FilePath dir(directory.RemoveTrailingPathSeparator()); + return FilePath(dir.string() + kPathSeparator + relative_path.string()); +} + +// Returns true if pathname describes something findable in the file-system, +// either a file, directory, or whatever. +bool FilePath::FileOrDirectoryExists() const { +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + return attributes != kInvalidFileAttributes; +#else + posix::StatStruct file_stat; + return posix::Stat(pathname_.c_str(), &file_stat) == 0; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns true if pathname describes a directory in the file-system +// that exists. +bool FilePath::DirectoryExists() const { + bool result = false; +#if GTEST_OS_WINDOWS + // Don't strip off trailing separator if path is a root directory on + // Windows (like "C:\\"). + const FilePath& path(IsRootDirectory() ? *this : + RemoveTrailingPathSeparator()); +#else + const FilePath& path(*this); +#endif + +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + if ((attributes != kInvalidFileAttributes) && + (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + result = true; + } +#else + posix::StatStruct file_stat; + result = posix::Stat(path.c_str(), &file_stat) == 0 && + posix::IsDir(file_stat); +#endif // GTEST_OS_WINDOWS_MOBILE + + return result; +} + +// Returns true if pathname describes a root directory. (Windows has one +// root directory per disk drive.) +bool FilePath::IsRootDirectory() const { +#if GTEST_OS_WINDOWS + // TODO(wan@google.com): on Windows a network share like + // \\server\share can be a root directory, although it cannot be the + // current directory. Handle this properly. + return pathname_.length() == 3 && IsAbsolutePath(); +#else + return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); +#endif +} + +// Returns true if pathname describes an absolute path. +bool FilePath::IsAbsolutePath() const { + const char* const name = pathname_.c_str(); +#if GTEST_OS_WINDOWS + return pathname_.length() >= 3 && + ((name[0] >= 'a' && name[0] <= 'z') || + (name[0] >= 'A' && name[0] <= 'Z')) && + name[1] == ':' && + IsPathSeparator(name[2]); +#else + return IsPathSeparator(name[0]); +#endif +} + +// Returns a pathname for a file that does not currently exist. The pathname +// will be directory/base_name.extension or +// directory/base_name_<number>.extension if directory/base_name.extension +// already exists. The number will be incremented until a pathname is found +// that does not already exist. +// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. +// There could be a race condition if two or more processes are calling this +// function at the same time -- they could both pick the same filename. +FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension) { + FilePath full_pathname; + int number = 0; + do { + full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); + } while (full_pathname.FileOrDirectoryExists()); + return full_pathname; +} + +// Returns true if FilePath ends with a path separator, which indicates that +// it is intended to represent a directory. Returns false otherwise. +// This does NOT check that a directory (or file) actually exists. +bool FilePath::IsDirectory() const { + return !pathname_.empty() && + IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); +} + +// Create directories so that path exists. Returns true if successful or if +// the directories already exist; returns false if unable to create directories +// for any reason. +bool FilePath::CreateDirectoriesRecursively() const { + if (!this->IsDirectory()) { + return false; + } + + if (pathname_.length() == 0 || this->DirectoryExists()) { + return true; + } + + const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); + return parent.CreateDirectoriesRecursively() && this->CreateFolder(); +} + +// Create the directory so that path exists. Returns true if successful or +// if the directory already exists; returns false if unable to create the +// directory for any reason, including if the parent directory does not +// exist. Not named "CreateDirectory" because that's a macro on Windows. +bool FilePath::CreateFolder() const { +#if GTEST_OS_WINDOWS_MOBILE + FilePath removed_sep(this->RemoveTrailingPathSeparator()); + LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); + int result = CreateDirectory(unicode, NULL) ? 0 : -1; + delete [] unicode; +#elif GTEST_OS_WINDOWS + int result = _mkdir(pathname_.c_str()); +#else + int result = mkdir(pathname_.c_str(), 0777); +#endif // GTEST_OS_WINDOWS_MOBILE + + if (result == -1) { + return this->DirectoryExists(); // An error is OK if the directory exists. + } + return true; // No error. +} + +// If input name has a trailing separator character, remove it and return the +// name, otherwise return the name string unmodified. +// On Windows platform, uses \ as the separator, other platforms use /. +FilePath FilePath::RemoveTrailingPathSeparator() const { + return IsDirectory() + ? FilePath(pathname_.substr(0, pathname_.length() - 1)) + : *this; +} + +// Removes any redundant separators that might be in the pathname. +// For example, "bar///foo" becomes "bar/foo". Does not eliminate other +// redundancies that might be in a pathname involving "." or "..". +// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). +void FilePath::Normalize() { + if (pathname_.c_str() == NULL) { + pathname_ = ""; + return; + } + const char* src = pathname_.c_str(); + char* const dest = new char[pathname_.length() + 1]; + char* dest_ptr = dest; + memset(dest_ptr, 0, pathname_.length() + 1); + + while (*src != '\0') { + *dest_ptr = *src; + if (!IsPathSeparator(*src)) { + src++; + } else { +#if GTEST_HAS_ALT_PATH_SEP_ + if (*dest_ptr == kAlternatePathSeparator) { + *dest_ptr = kPathSeparator; + } +#endif + while (IsPathSeparator(*src)) + src++; + } + dest_ptr++; + } + *dest_ptr = '\0'; + pathname_ = dest; + delete[] dest; +} + +} // namespace internal +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + + +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#if GTEST_OS_WINDOWS_MOBILE +# include <windows.h> // For TerminateProcess() +#elif GTEST_OS_WINDOWS +# include <io.h> +# include <sys/stat.h> +#else +# include <unistd.h> +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_MAC +# include <mach/mach_init.h> +# include <mach/task.h> +# include <mach/vm_map.h> +#endif // GTEST_OS_MAC + +#if GTEST_OS_QNX +# include <devctl.h> +# include <sys/procfs.h> +#endif // GTEST_OS_QNX + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { + +#if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC and C++Builder do not provide a definition of STDERR_FILENO. +const int kStdOutFileno = 1; +const int kStdErrFileno = 2; +#else +const int kStdOutFileno = STDOUT_FILENO; +const int kStdErrFileno = STDERR_FILENO; +#endif // _MSC_VER + +#if GTEST_OS_MAC + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const task_t task = mach_task_self(); + mach_msg_type_number_t thread_count; + thread_act_array_t thread_list; + const kern_return_t status = task_threads(task, &thread_list, &thread_count); + if (status == KERN_SUCCESS) { + // task_threads allocates resources in thread_list and we need to free them + // to avoid leaks. + vm_deallocate(task, + reinterpret_cast<vm_address_t>(thread_list), + sizeof(thread_t) * thread_count); + return static_cast<size_t>(thread_count); + } else { + return 0; + } +} + +#elif GTEST_OS_QNX + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const int fd = open("/proc/self/as", O_RDONLY); + if (fd < 0) { + return 0; + } + procfs_info process_info; + const int status = + devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL); + close(fd); + if (status == EOK) { + return static_cast<size_t>(process_info.num_threads); + } else { + return 0; + } +} + +#else + +size_t GetThreadCount() { + // There's no portable way to detect the number of threads, so we just + // return 0 to indicate that we cannot detect it. + return 0; +} + +#endif // GTEST_OS_MAC + +#if GTEST_USES_POSIX_RE + +// Implements RE. Currently only needed for death tests. + +RE::~RE() { + if (is_valid_) { + // regfree'ing an invalid regex might crash because the content + // of the regex is undefined. Since the regex's are essentially + // the same, one cannot be valid (or invalid) without the other + // being so too. + regfree(&partial_regex_); + regfree(&full_regex_); + } + free(const_cast<char*>(pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.full_regex_, str, 1, &match, 0) == 0; +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = posix::StrDup(regex); + + // Reserves enough bytes to hold the regular expression used for a + // full match. + const size_t full_regex_len = strlen(regex) + 10; + char* const full_pattern = new char[full_regex_len]; + + snprintf(full_pattern, full_regex_len, "^(%s)$", regex); + is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; + // We want to call regcomp(&partial_regex_, ...) even if the + // previous expression returns false. Otherwise partial_regex_ may + // not be properly initialized can may cause trouble when it's + // freed. + // + // Some implementation of POSIX regex (e.g. on at least some + // versions of Cygwin) doesn't accept the empty string as a valid + // regex. We change it to an equivalent form "()" to be safe. + if (is_valid_) { + const char* const partial_regex = (*regex == '\0') ? "()" : regex; + is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; + } + EXPECT_TRUE(is_valid_) + << "Regular expression \"" << regex + << "\" is not a valid POSIX Extended regular expression."; + + delete[] full_pattern; +} + +#elif GTEST_USES_SIMPLE_RE + +// Returns true iff ch appears anywhere in str (excluding the +// terminating '\0' character). +bool IsInSet(char ch, const char* str) { + return ch != '\0' && strchr(str, ch) != NULL; +} + +// Returns true iff ch belongs to the given classification. Unlike +// similar functions in <ctype.h>, these aren't affected by the +// current locale. +bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } +bool IsAsciiPunct(char ch) { + return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); +} +bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } +bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } +bool IsAsciiWordChar(char ch) { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || ch == '_'; +} + +// Returns true iff "\\c" is a supported escape sequence. +bool IsValidEscape(char c) { + return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); +} + +// Returns true iff the given atom (specified by escaped and pattern) +// matches ch. The result is undefined if the atom is invalid. +bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { + if (escaped) { // "\\p" where p is pattern_char. + switch (pattern_char) { + case 'd': return IsAsciiDigit(ch); + case 'D': return !IsAsciiDigit(ch); + case 'f': return ch == '\f'; + case 'n': return ch == '\n'; + case 'r': return ch == '\r'; + case 's': return IsAsciiWhiteSpace(ch); + case 'S': return !IsAsciiWhiteSpace(ch); + case 't': return ch == '\t'; + case 'v': return ch == '\v'; + case 'w': return IsAsciiWordChar(ch); + case 'W': return !IsAsciiWordChar(ch); + } + return IsAsciiPunct(pattern_char) && pattern_char == ch; + } + + return (pattern_char == '.' && ch != '\n') || pattern_char == ch; +} + +// Helper function used by ValidateRegex() to format error messages. +std::string FormatRegexSyntaxError(const char* regex, int index) { + return (Message() << "Syntax error at index " << index + << " in simple regular expression \"" << regex << "\": ").GetString(); +} + +// Generates non-fatal failures and returns false if regex is invalid; +// otherwise returns true. +bool ValidateRegex(const char* regex) { + if (regex == NULL) { + // TODO(wan@google.com): fix the source file location in the + // assertion failures to match where the regex is used in user + // code. + ADD_FAILURE() << "NULL is not a valid simple regular expression."; + return false; + } + + bool is_valid = true; + + // True iff ?, *, or + can follow the previous atom. + bool prev_repeatable = false; + for (int i = 0; regex[i]; i++) { + if (regex[i] == '\\') { // An escape sequence + i++; + if (regex[i] == '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "'\\' cannot appear at the end."; + return false; + } + + if (!IsValidEscape(regex[i])) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "invalid escape sequence \"\\" << regex[i] << "\"."; + is_valid = false; + } + prev_repeatable = true; + } else { // Not an escape sequence. + const char ch = regex[i]; + + if (ch == '^' && i > 0) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'^' can only appear at the beginning."; + is_valid = false; + } else if (ch == '$' && regex[i + 1] != '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'$' can only appear at the end."; + is_valid = false; + } else if (IsInSet(ch, "()[]{}|")) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' is unsupported."; + is_valid = false; + } else if (IsRepeat(ch) && !prev_repeatable) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' can only follow a repeatable token."; + is_valid = false; + } + + prev_repeatable = !IsInSet(ch, "^$?*+"); + } + } + + return is_valid; +} + +// Matches a repeated regex atom followed by a valid simple regular +// expression. The regex atom is defined as c if escaped is false, +// or \c otherwise. repeat is the repetition meta character (?, *, +// or +). The behavior is undefined if str contains too many +// characters to be indexable by size_t, in which case the test will +// probably time out anyway. We are fine with this limitation as +// std::string has it too. +bool MatchRepetitionAndRegexAtHead( + bool escaped, char c, char repeat, const char* regex, + const char* str) { + const size_t min_count = (repeat == '+') ? 1 : 0; + const size_t max_count = (repeat == '?') ? 1 : + static_cast<size_t>(-1) - 1; + // We cannot call numeric_limits::max() as it conflicts with the + // max() macro on Windows. + + for (size_t i = 0; i <= max_count; ++i) { + // We know that the atom matches each of the first i characters in str. + if (i >= min_count && MatchRegexAtHead(regex, str + i)) { + // We have enough matches at the head, and the tail matches too. + // Since we only care about *whether* the pattern matches str + // (as opposed to *how* it matches), there is no need to find a + // greedy match. + return true; + } + if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) + return false; + } + return false; +} + +// Returns true iff regex matches a prefix of str. regex must be a +// valid simple regular expression and not start with "^", or the +// result is undefined. +bool MatchRegexAtHead(const char* regex, const char* str) { + if (*regex == '\0') // An empty regex matches a prefix of anything. + return true; + + // "$" only matches the end of a string. Note that regex being + // valid guarantees that there's nothing after "$" in it. + if (*regex == '$') + return *str == '\0'; + + // Is the first thing in regex an escape sequence? + const bool escaped = *regex == '\\'; + if (escaped) + ++regex; + if (IsRepeat(regex[1])) { + // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so + // here's an indirect recursion. It terminates as the regex gets + // shorter in each recursion. + return MatchRepetitionAndRegexAtHead( + escaped, regex[0], regex[1], regex + 2, str); + } else { + // regex isn't empty, isn't "$", and doesn't start with a + // repetition. We match the first atom of regex with the first + // character of str and recurse. + return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && + MatchRegexAtHead(regex + 1, str + 1); + } +} + +// Returns true iff regex matches any substring of str. regex must be +// a valid simple regular expression, or the result is undefined. +// +// The algorithm is recursive, but the recursion depth doesn't exceed +// the regex length, so we won't need to worry about running out of +// stack space normally. In rare cases the time complexity can be +// exponential with respect to the regex length + the string length, +// but usually it's must faster (often close to linear). +bool MatchRegexAnywhere(const char* regex, const char* str) { + if (regex == NULL || str == NULL) + return false; + + if (*regex == '^') + return MatchRegexAtHead(regex + 1, str); + + // A successful match can be anywhere in str. + do { + if (MatchRegexAtHead(regex, str)) + return true; + } while (*str++ != '\0'); + return false; +} + +// Implements the RE class. + +RE::~RE() { + free(const_cast<char*>(pattern_)); + free(const_cast<char*>(full_pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = full_pattern_ = NULL; + if (regex != NULL) { + pattern_ = posix::StrDup(regex); + } + + is_valid_ = ValidateRegex(regex); + if (!is_valid_) { + // No need to calculate the full pattern when the regex is invalid. + return; + } + + const size_t len = strlen(regex); + // Reserves enough bytes to hold the regular expression used for a + // full match: we need space to prepend a '^', append a '$', and + // terminate the string with '\0'. + char* buffer = static_cast<char*>(malloc(len + 3)); + full_pattern_ = buffer; + + if (*regex != '^') + *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. + + // We don't use snprintf or strncpy, as they trigger a warning when + // compiled with VC++ 8.0. + memcpy(buffer, regex, len); + buffer += len; + + if (len == 0 || regex[len - 1] != '$') + *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. + + *buffer = '\0'; +} + +#endif // GTEST_USES_POSIX_RE + +const char kUnknownFile[] = "unknown file"; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) { + return file_name + ":"; + } +#ifdef _MSC_VER + return file_name + "(" + StreamableToString(line) + "):"; +#else + return file_name + ":" + StreamableToString(line) + ":"; +#endif // _MSC_VER +} + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +// Note that FormatCompilerIndependentFileLocation() does NOT append colon +// to the file location it produces, unlike FormatFileLocation(). +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( + const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) + return file_name; + else + return file_name + ":" + StreamableToString(line); +} + + +GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) + : severity_(severity) { + const char* const marker = + severity == GTEST_INFO ? "[ INFO ]" : + severity == GTEST_WARNING ? "[WARNING]" : + severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; + GetStream() << ::std::endl << marker << " " + << FormatFileLocation(file, line).c_str() << ": "; +} + +// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. +GTestLog::~GTestLog() { + GetStream() << ::std::endl; + if (severity_ == GTEST_FATAL) { + fflush(stderr); + posix::Abort(); + } +} +// Disable Microsoft deprecation warnings for POSIX functions called from +// this class (creat, dup, dup2, and close) +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4996) +#endif // _MSC_VER + +#if GTEST_HAS_STREAM_REDIRECTION + +// Object that captures an output stream (stdout/stderr). +class CapturedStream { + public: + // The ctor redirects the stream to a temporary file. + explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { +# if GTEST_OS_WINDOWS + char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT + char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT + + ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); + const UINT success = ::GetTempFileNameA(temp_dir_path, + "gtest_redir", + 0, // Generate unique file name. + temp_file_path); + GTEST_CHECK_(success != 0) + << "Unable to create a temporary file in " << temp_dir_path; + const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); + GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " + << temp_file_path; + filename_ = temp_file_path; +# else + // There's no guarantee that a test has write access to the current + // directory, so we create the temporary file in the /tmp directory + // instead. We use /tmp on most systems, and /sdcard on Android. + // That's because Android doesn't have /tmp. +# if GTEST_OS_LINUX_ANDROID + // Note: Android applications are expected to call the framework's + // Context.getExternalStorageDirectory() method through JNI to get + // the location of the world-writable SD Card directory. However, + // this requires a Context handle, which cannot be retrieved + // globally from native code. Doing so also precludes running the + // code as part of a regular standalone executable, which doesn't + // run in a Dalvik process (e.g. when running it through 'adb shell'). + // + // The location /sdcard is directly accessible from native code + // and is the only location (unofficially) supported by the Android + // team. It's generally a symlink to the real SD Card mount point + // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or + // other OEM-customized locations. Never rely on these, and always + // use /sdcard. + char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX"; +# else + char name_template[] = "/tmp/captured_stream.XXXXXX"; +# endif // GTEST_OS_LINUX_ANDROID + const int captured_fd = mkstemp(name_template); + filename_ = name_template; +# endif // GTEST_OS_WINDOWS + fflush(NULL); + dup2(captured_fd, fd_); + close(captured_fd); + } + + ~CapturedStream() { + remove(filename_.c_str()); + } + + std::string GetCapturedString() { + if (uncaptured_fd_ != -1) { + // Restores the original stream. + fflush(NULL); + dup2(uncaptured_fd_, fd_); + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + + FILE* const file = posix::FOpen(filename_.c_str(), "r"); + const std::string content = ReadEntireFile(file); + posix::FClose(file); + return content; + } + + private: + // Reads the entire content of a file as an std::string. + static std::string ReadEntireFile(FILE* file); + + // Returns the size (in bytes) of a file. + static size_t GetFileSize(FILE* file); + + const int fd_; // A stream to capture. + int uncaptured_fd_; + // Name of the temporary file holding the stderr output. + ::std::string filename_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); +}; + +// Returns the size (in bytes) of a file. +size_t CapturedStream::GetFileSize(FILE* file) { + fseek(file, 0, SEEK_END); + return static_cast<size_t>(ftell(file)); +} + +// Reads the entire content of a file as a string. +std::string CapturedStream::ReadEntireFile(FILE* file) { + const size_t file_size = GetFileSize(file); + char* const buffer = new char[file_size]; + + size_t bytes_last_read = 0; // # of bytes read in the last fread() + size_t bytes_read = 0; // # of bytes read so far + + fseek(file, 0, SEEK_SET); + + // Keeps reading the file until we cannot read further or the + // pre-determined file size is reached. + do { + bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); + bytes_read += bytes_last_read; + } while (bytes_last_read > 0 && bytes_read < file_size); + + const std::string content(buffer, bytes_read); + delete[] buffer; + + return content; +} + +# ifdef _MSC_VER +# pragma warning(pop) +# endif // _MSC_VER + +static CapturedStream* g_captured_stderr = NULL; +static CapturedStream* g_captured_stdout = NULL; + +// Starts capturing an output stream (stdout/stderr). +void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { + if (*stream != NULL) { + GTEST_LOG_(FATAL) << "Only one " << stream_name + << " capturer can exist at a time."; + } + *stream = new CapturedStream(fd); +} + +// Stops capturing the output stream and returns the captured string. +std::string GetCapturedStream(CapturedStream** captured_stream) { + const std::string content = (*captured_stream)->GetCapturedString(); + + delete *captured_stream; + *captured_stream = NULL; + + return content; +} + +// Starts capturing stdout. +void CaptureStdout() { + CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); +} + +// Starts capturing stderr. +void CaptureStderr() { + CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); +} + +// Stops capturing stdout and returns the captured string. +std::string GetCapturedStdout() { + return GetCapturedStream(&g_captured_stdout); +} + +// Stops capturing stderr and returns the captured string. +std::string GetCapturedStderr() { + return GetCapturedStream(&g_captured_stderr); +} + +#endif // GTEST_HAS_STREAM_REDIRECTION + +#if GTEST_HAS_DEATH_TEST + +// A copy of all command line arguments. Set by InitGoogleTest(). +::std::vector<testing::internal::string> g_argvs; + +static const ::std::vector<testing::internal::string>* g_injected_test_argvs = + NULL; // Owned. + +void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) { + if (g_injected_test_argvs != argvs) + delete g_injected_test_argvs; + g_injected_test_argvs = argvs; +} + +const ::std::vector<testing::internal::string>& GetInjectableArgvs() { + if (g_injected_test_argvs != NULL) { + return *g_injected_test_argvs; + } + return g_argvs; +} +#endif // GTEST_HAS_DEATH_TEST + +#if GTEST_OS_WINDOWS_MOBILE +namespace posix { +void Abort() { + DebugBreak(); + TerminateProcess(GetCurrentProcess(), 1); +} +} // namespace posix +#endif // GTEST_OS_WINDOWS_MOBILE + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "GTEST_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string full_flag = + (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); + + Message env_var; + for (size_t i = 0; i != full_flag.length(); i++) { + env_var << ToUpper(full_flag.c_str()[i]); + } + + return env_var.GetString(); +} + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const Message& src_text, const char* str, Int32* value) { + // Parses the environment variable as a decimal integer. + char* end = NULL; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value \"" << str << "\".\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + // Is the parsed value in the range of an Int32? + const Int32 result = static_cast<Int32>(long_value); + if (long_value == LONG_MAX || long_value == LONG_MIN || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value " << str << ", which overflows.\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + *value = result; + return true; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromGTestEnv(const char* flag, bool default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + return string_value == NULL ? + default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + if (string_value == NULL) { + // The environment variable is not set. + return default_value; + } + + Int32 result = default_value; + if (!ParseInt32(Message() << "Environment variable " << env_var, + string_value, &result)) { + printf("The default value %s is used.\n", + (Message() << default_value).GetString().c_str()); + fflush(stdout); + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +const char* StringFromGTestEnv(const char* flag, const char* default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value = posix::GetEnv(env_var.c_str()); + return value == NULL ? default_value : value; +} + +} // namespace internal +} // namespace testing +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr); +// +// It uses the << operator when possible, and prints the bytes in the +// object otherwise. A user can override its behavior for a class +// type Foo by defining either operator<<(::std::ostream&, const Foo&) +// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that +// defines Foo. + +#include <ctype.h> +#include <stdio.h> +#include <ostream> // NOLINT +#include <string> + +namespace testing { + +namespace { + +using ::std::ostream; + +// Prints a segment of bytes in the given object. +void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, + size_t count, ostream* os) { + char text[5] = ""; + for (size_t i = 0; i != count; i++) { + const size_t j = start + i; + if (i != 0) { + // Organizes the bytes into groups of 2 for easy parsing by + // human. + if ((j % 2) == 0) + *os << ' '; + else + *os << '-'; + } + GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]); + *os << text; + } +} + +// Prints the bytes in the given value to the given ostream. +void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, + ostream* os) { + // Tells the user how big the object is. + *os << count << "-byte object <"; + + const size_t kThreshold = 132; + const size_t kChunkSize = 64; + // If the object size is bigger than kThreshold, we'll have to omit + // some details by printing only the first and the last kChunkSize + // bytes. + // TODO(wan): let the user control the threshold using a flag. + if (count < kThreshold) { + PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); + } else { + PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); + *os << " ... "; + // Rounds up to 2-byte boundary. + const size_t resume_pos = (count - kChunkSize + 1)/2*2; + PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); + } + *os << ">"; +} + +} // namespace + +namespace internal2 { + +// Delegates to PrintBytesInObjectToImpl() to print the bytes in the +// given object. The delegation simplifies the implementation, which +// uses the << operator and thus is easier done outside of the +// ::testing::internal namespace, which contains a << operator that +// sometimes conflicts with the one in STL. +void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, + ostream* os) { + PrintBytesInObjectToImpl(obj_bytes, count, os); +} + +} // namespace internal2 + +namespace internal { + +// Depending on the value of a char (or wchar_t), we print it in one +// of three formats: +// - as is if it's a printable ASCII (e.g. 'a', '2', ' '), +// - as a hexidecimal escape sequence (e.g. '\x7F'), or +// - as a special escape sequence (e.g. '\r', '\n'). +enum CharFormat { + kAsIs, + kHexEscape, + kSpecialEscape +}; + +// Returns true if c is a printable ASCII character. We test the +// value of c directly instead of calling isprint(), which is buggy on +// Windows Mobile. +inline bool IsPrintableAscii(wchar_t c) { + return 0x20 <= c && c <= 0x7E; +} + +// Prints a wide or narrow char c as a character literal without the +// quotes, escaping it when necessary; returns how c was formatted. +// The template argument UnsignedChar is the unsigned version of Char, +// which is the type of c. +template <typename UnsignedChar, typename Char> +static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { + switch (static_cast<wchar_t>(c)) { + case L'\0': + *os << "\\0"; + break; + case L'\'': + *os << "\\'"; + break; + case L'\\': + *os << "\\\\"; + break; + case L'\a': + *os << "\\a"; + break; + case L'\b': + *os << "\\b"; + break; + case L'\f': + *os << "\\f"; + break; + case L'\n': + *os << "\\n"; + break; + case L'\r': + *os << "\\r"; + break; + case L'\t': + *os << "\\t"; + break; + case L'\v': + *os << "\\v"; + break; + default: + if (IsPrintableAscii(c)) { + *os << static_cast<char>(c); + return kAsIs; + } else { + *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c)); + return kHexEscape; + } + } + return kSpecialEscape; +} + +// Prints a wchar_t c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) { + switch (c) { + case L'\'': + *os << "'"; + return kAsIs; + case L'"': + *os << "\\\""; + return kSpecialEscape; + default: + return PrintAsCharLiteralTo<wchar_t>(c, os); + } +} + +// Prints a char c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(char c, ostream* os) { + return PrintAsStringLiteralTo( + static_cast<wchar_t>(static_cast<unsigned char>(c)), os); +} + +// Prints a wide or narrow character c and its code. '\0' is printed +// as "'\\0'", other unprintable characters are also properly escaped +// using the standard C++ escape sequence. The template argument +// UnsignedChar is the unsigned version of Char, which is the type of c. +template <typename UnsignedChar, typename Char> +void PrintCharAndCodeTo(Char c, ostream* os) { + // First, print c as a literal in the most readable form we can find. + *os << ((sizeof(c) > 1) ? "L'" : "'"); + const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os); + *os << "'"; + + // To aid user debugging, we also print c's code in decimal, unless + // it's 0 (in which case c was printed as '\\0', making the code + // obvious). + if (c == 0) + return; + *os << " (" << static_cast<int>(c); + + // For more convenience, we print c's code again in hexidecimal, + // unless c was already printed in the form '\x##' or the code is in + // [1, 9]. + if (format == kHexEscape || (1 <= c && c <= 9)) { + // Do nothing. + } else { + *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c)); + } + *os << ")"; +} + +void PrintTo(unsigned char c, ::std::ostream* os) { + PrintCharAndCodeTo<unsigned char>(c, os); +} +void PrintTo(signed char c, ::std::ostream* os) { + PrintCharAndCodeTo<unsigned char>(c, os); +} + +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its code. L'\0' is printed as "L'\\0'". +void PrintTo(wchar_t wc, ostream* os) { + PrintCharAndCodeTo<wchar_t>(wc, os); +} + +// Prints the given array of characters to the ostream. CharType must be either +// char or wchar_t. +// The array starts at begin, the length is len, it may include '\0' characters +// and may not be NUL-terminated. +template <typename CharType> +static void PrintCharsAsStringTo( + const CharType* begin, size_t len, ostream* os) { + const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; + *os << kQuoteBegin; + bool is_previous_hex = false; + for (size_t index = 0; index < len; ++index) { + const CharType cur = begin[index]; + if (is_previous_hex && IsXDigit(cur)) { + // Previous character is of '\x..' form and this character can be + // interpreted as another hexadecimal digit in its number. Break string to + // disambiguate. + *os << "\" " << kQuoteBegin; + } + is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + } + *os << "\""; +} + +// Prints a (const) char/wchar_t array of 'len' elements, starting at address +// 'begin'. CharType must be either char or wchar_t. +template <typename CharType> +static void UniversalPrintCharArray( + const CharType* begin, size_t len, ostream* os) { + // The code + // const char kFoo[] = "foo"; + // generates an array of 4, not 3, elements, with the last one being '\0'. + // + // Therefore when printing a char array, we don't print the last element if + // it's '\0', such that the output matches the string literal as it's + // written in the source code. + if (len > 0 && begin[len - 1] == '\0') { + PrintCharsAsStringTo(begin, len - 1, os); + return; + } + + // If, however, the last element in the array is not '\0', e.g. + // const char kFoo[] = { 'f', 'o', 'o' }; + // we must print the entire array. We also print a message to indicate + // that the array is not NUL-terminated. + PrintCharsAsStringTo(begin, len, os); + *os << " (no terminating NUL)"; +} + +// Prints a (const) char array of 'len' elements, starting at address 'begin'. +void UniversalPrintArray(const char* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints a (const) wchar_t array of 'len' elements, starting at address +// 'begin'. +void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints the given C string to the ostream. +void PrintTo(const char* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_<const void*>(s) << " pointing to "; + PrintCharsAsStringTo(s, strlen(s), os); + } +} + +// MSVC compiler can be configured to define whar_t as a typedef +// of unsigned short. Defining an overload for const wchar_t* in that case +// would cause pointers to unsigned shorts be printed as wide strings, +// possibly accessing more memory than intended and causing invalid +// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when +// wchar_t is implemented as a native type. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Prints the given wide C string to the ostream. +void PrintTo(const wchar_t* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_<const void*>(s) << " pointing to "; + PrintCharsAsStringTo(s, wcslen(s), os); + } +} +#endif // wchar_t is native + +// Prints a ::string object. +#if GTEST_HAS_GLOBAL_STRING +void PrintStringTo(const ::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +void PrintStringTo(const ::std::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} + +// Prints a ::wstring object. +#if GTEST_HAS_GLOBAL_WSTRING +void PrintWideStringTo(const ::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +void PrintWideStringTo(const ::std::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_STD_WSTRING + +} // namespace internal + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// The Google C++ Testing Framework (Google Test) + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +using internal::GetUnitTestImpl; + +// Gets the summary of the failure message by omitting the stack trace +// in it. +std::string TestPartResult::ExtractSummary(const char* message) { + const char* const stack_trace = strstr(message, internal::kStackTraceMarker); + return stack_trace == NULL ? message : + std::string(message, stack_trace); +} + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { + return os + << result.file_name() << ":" << result.line_number() << ": " + << (result.type() == TestPartResult::kSuccess ? "Success" : + result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : + "Non-fatal failure") << ":\n" + << result.message() << std::endl; +} + +// Appends a TestPartResult to the array. +void TestPartResultArray::Append(const TestPartResult& result) { + array_.push_back(result); +} + +// Returns the TestPartResult at the given index (0-based). +const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { + if (index < 0 || index >= size()) { + printf("\nInvalid index (%d) into TestPartResultArray.\n", index); + internal::posix::Abort(); + } + + return array_[index]; +} + +// Returns the number of TestPartResult objects in the array. +int TestPartResultArray::size() const { + return static_cast<int>(array_.size()); +} + +namespace internal { + +HasNewFatalFailureHelper::HasNewFatalFailureHelper() + : has_new_fatal_failure_(false), + original_reporter_(GetUnitTestImpl()-> + GetTestPartResultReporterForCurrentThread()) { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); +} + +HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( + original_reporter_); +} + +void HasNewFatalFailureHelper::ReportTestPartResult( + const TestPartResult& result) { + if (result.fatally_failed()) + has_new_fatal_failure_ = true; + original_reporter_->ReportTestPartResult(result); +} + +} // namespace internal + +} // namespace testing +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + + +namespace testing { +namespace internal { + +#if GTEST_HAS_TYPED_TEST_P + +// Skips to the first non-space char in str. Returns an empty string if str +// contains only whitespace characters. +static const char* SkipSpaces(const char* str) { + while (IsSpace(*str)) + str++; + return str; +} + +// Verifies that registered_tests match the test names in +// defined_test_names_; returns registered_tests if successful, or +// aborts the program otherwise. +const char* TypedTestCasePState::VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests) { + typedef ::std::set<const char*>::const_iterator DefinedTestIter; + registered_ = true; + + // Skip initial whitespace in registered_tests since some + // preprocessors prefix stringizied literals with whitespace. + registered_tests = SkipSpaces(registered_tests); + + Message errors; + ::std::set<std::string> tests; + for (const char* names = registered_tests; names != NULL; + names = SkipComma(names)) { + const std::string name = GetPrefixUntilComma(names); + if (tests.count(name) != 0) { + errors << "Test " << name << " is listed more than once.\n"; + continue; + } + + bool found = false; + for (DefinedTestIter it = defined_test_names_.begin(); + it != defined_test_names_.end(); + ++it) { + if (name == *it) { + found = true; + break; + } + } + + if (found) { + tests.insert(name); + } else { + errors << "No test named " << name + << " can be found in this test case.\n"; + } + } + + for (DefinedTestIter it = defined_test_names_.begin(); + it != defined_test_names_.end(); + ++it) { + if (tests.count(*it) == 0) { + errors << "You forgot to list test " << *it << ".\n"; + } + } + + const std::string& errors_str = errors.GetString(); + if (errors_str != "") { + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors_str.c_str()); + fflush(stderr); + posix::Abort(); + } + + return registered_tests; +} + +#endif // GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing diff --git a/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h b/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h new file mode 120000 index 0000000000000000000000000000000000000000..48d39090f1cabfc4a852d54e0e1f186362eeb1f5 --- /dev/null +++ b/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h @@ -0,0 +1 @@ +gtest.h \ No newline at end of file diff --git a/lib/kokkos/tpls/gtest/gtest/gtest.h b/lib/kokkos/tpls/gtest/gtest/gtest.h new file mode 100644 index 0000000000000000000000000000000000000000..c74d098fa9b179ea87a57a4a42b735e430b83c6d --- /dev/null +++ b/lib/kokkos/tpls/gtest/gtest/gtest.h @@ -0,0 +1,20065 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for Google Test. It should be +// included by any test program that uses Google Test. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! +// +// Acknowledgment: Google Test borrowed the idea of automatic test +// registration from Barthelemy Dagenais' (barthelemy@prologique.com) +// easyUnit framework. + +#ifdef __GNUC__ +#pragma GCC system_header +#endif + +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_H_ + +#include <limits> +#include <ostream> +#include <vector> + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares functions and macros used internally by +// Google Test. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan) +// +// Low-level types and utilities for porting Google Test to various +// platforms. They are subject to change without notice. DO NOT USE +// THEM IN USER CODE. +// +// This file is fundamental to Google Test. All other Google Test source +// files are expected to #include this. Therefore, it cannot #include +// any other Google Test header. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ + +// The user can define the following macros in the build script to +// control Google Test's behavior. If the user doesn't define a macro +// in this list, Google Test will define it. +// +// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2) +// is/isn't available. +// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions +// are enabled. +// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::string, which is different to std::string). +// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::wstring, which is different to std::wstring). +// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular +// expressions are/aren't available. +// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that <pthread.h> +// is/isn't available. +// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't +// enabled. +// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that +// std::wstring does/doesn't work (Google Test can +// be used where std::wstring is unavailable). +// GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple +// is/isn't available. +// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the +// compiler supports Microsoft's "Structured +// Exception Handling". +// GTEST_HAS_STREAM_REDIRECTION +// - Define it to 1/0 to indicate whether the +// platform supports I/O stream redirection using +// dup() and dup2(). +// GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google +// Test's own tr1 tuple implementation should be +// used. Unused when the user sets +// GTEST_HAS_TR1_TUPLE to 0. +// GTEST_LANG_CXX11 - Define it to 1/0 to indicate that Google Test +// is building in C++11/C++98 mode. +// GTEST_LINKED_AS_SHARED_LIBRARY +// - Define to 1 when compiling tests that use +// Google Test as a shared library (known as +// DLL on Windows). +// GTEST_CREATE_SHARED_LIBRARY +// - Define to 1 when compiling Google Test itself +// as a shared library. + +// This header defines the following utilities: +// +// Macros indicating the current platform (defined to 1 if compiled on +// the given platform; otherwise undefined): +// GTEST_OS_AIX - IBM AIX +// GTEST_OS_CYGWIN - Cygwin +// GTEST_OS_HPUX - HP-UX +// GTEST_OS_LINUX - Linux +// GTEST_OS_LINUX_ANDROID - Google Android +// GTEST_OS_MAC - Mac OS X +// GTEST_OS_IOS - iOS +// GTEST_OS_IOS_SIMULATOR - iOS simulator +// GTEST_OS_NACL - Google Native Client (NaCl) +// GTEST_OS_OPENBSD - OpenBSD +// GTEST_OS_QNX - QNX +// GTEST_OS_SOLARIS - Sun Solaris +// GTEST_OS_SYMBIAN - Symbian +// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile) +// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop +// GTEST_OS_WINDOWS_MINGW - MinGW +// GTEST_OS_WINDOWS_MOBILE - Windows Mobile +// GTEST_OS_ZOS - z/OS +// +// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the +// most stable support. Since core members of the Google Test project +// don't have access to other platforms, support for them may be less +// stable. If you notice any problems on your platform, please notify +// googletestframework@googlegroups.com (patches for fixing them are +// even more welcome!). +// +// Note that it is possible that none of the GTEST_OS_* macros are defined. +// +// Macros indicating available Google Test features (defined to 1 if +// the corresponding feature is supported; otherwise undefined): +// GTEST_HAS_COMBINE - the Combine() function (for value-parameterized +// tests) +// GTEST_HAS_DEATH_TEST - death tests +// GTEST_HAS_PARAM_TEST - value-parameterized tests +// GTEST_HAS_TYPED_TEST - typed tests +// GTEST_HAS_TYPED_TEST_P - type-parameterized tests +// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with +// GTEST_HAS_POSIX_RE (see above) which users can +// define themselves. +// GTEST_USES_SIMPLE_RE - our own simple regex is used; +// the above two are mutually exclusive. +// GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ(). +// +// Macros for basic C++ coding: +// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning. +// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a +// variable don't have to be used. +// GTEST_DISALLOW_ASSIGN_ - disables operator=. +// GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=. +// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used. +// +// Synchronization: +// Mutex, MutexLock, ThreadLocal, GetThreadCount() +// - synchronization primitives. +// GTEST_IS_THREADSAFE - defined to 1 to indicate that the above +// synchronization primitives have real implementations +// and Google Test is thread-safe; or 0 otherwise. +// +// Template meta programming: +// is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only. +// IteratorTraits - partial implementation of std::iterator_traits, which +// is not available in libCstd when compiled with Sun C++. +// +// Smart pointers: +// scoped_ptr - as in TR2. +// +// Regular expressions: +// RE - a simple regular expression class using the POSIX +// Extended Regular Expression syntax on UNIX-like +// platforms, or a reduced regular exception syntax on +// other platforms, including Windows. +// +// Logging: +// GTEST_LOG_() - logs messages at the specified severity level. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. +// +// Stdout and stderr capturing: +// CaptureStdout() - starts capturing stdout. +// GetCapturedStdout() - stops capturing stdout and returns the captured +// string. +// CaptureStderr() - starts capturing stderr. +// GetCapturedStderr() - stops capturing stderr and returns the captured +// string. +// +// Integer types: +// TypeWithSize - maps an integer to a int type. +// Int32, UInt32, Int64, UInt64, TimeInMillis +// - integers of known sizes. +// BiggestInt - the biggest signed integer type. +// +// Command-line utilities: +// GTEST_FLAG() - references a flag. +// GTEST_DECLARE_*() - declares a flag. +// GTEST_DEFINE_*() - defines a flag. +// GetInjectableArgvs() - returns the command line as a vector of strings. +// +// Environment variable utilities: +// GetEnv() - gets the value of an environment variable. +// BoolFromGTestEnv() - parses a bool environment variable. +// Int32FromGTestEnv() - parses an Int32 environment variable. +// StringFromGTestEnv() - parses a string environment variable. + +#include <ctype.h> // for isspace, etc +#include <stddef.h> // for ptrdiff_t +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifndef _WIN32_WCE +# include <sys/types.h> +# include <sys/stat.h> +#endif // !_WIN32_WCE + +#if defined __APPLE__ +# include <AvailabilityMacros.h> +# include <TargetConditionals.h> +#endif + +#include <iostream> // NOLINT +#include <sstream> // NOLINT +#include <string> // NOLINT + +#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com" +#define GTEST_FLAG_PREFIX_ "gtest_" +#define GTEST_FLAG_PREFIX_DASH_ "gtest-" +#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_" +#define GTEST_NAME_ "Google Test" +#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/" + +// Determines the version of gcc that is used to compile this. +#ifdef __GNUC__ +// 40302 means version 4.3.2. +# define GTEST_GCC_VER_ \ + (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) +#endif // __GNUC__ + +// Determines the platform on which Google Test is compiled. +#ifdef __CYGWIN__ +# define GTEST_OS_CYGWIN 1 +#elif defined __SYMBIAN32__ +# define GTEST_OS_SYMBIAN 1 +#elif defined _WIN32 +# define GTEST_OS_WINDOWS 1 +# ifdef _WIN32_WCE +# define GTEST_OS_WINDOWS_MOBILE 1 +# elif defined(__MINGW__) || defined(__MINGW32__) +# define GTEST_OS_WINDOWS_MINGW 1 +# else +# define GTEST_OS_WINDOWS_DESKTOP 1 +# endif // _WIN32_WCE +#elif defined __APPLE__ +# define GTEST_OS_MAC 1 +# if TARGET_OS_IPHONE +# define GTEST_OS_IOS 1 +# if TARGET_IPHONE_SIMULATOR +# define GTEST_OS_IOS_SIMULATOR 1 +# endif +# endif +#elif defined __linux__ +# define GTEST_OS_LINUX 1 +# if defined __ANDROID__ +# define GTEST_OS_LINUX_ANDROID 1 +# endif +#elif defined __MVS__ +# define GTEST_OS_ZOS 1 +#elif defined(__sun) && defined(__SVR4) +# define GTEST_OS_SOLARIS 1 +#elif defined(_AIX) +# define GTEST_OS_AIX 1 +#elif defined(__hpux) +# define GTEST_OS_HPUX 1 +#elif defined __native_client__ +# define GTEST_OS_NACL 1 +#elif defined __OpenBSD__ +# define GTEST_OS_OPENBSD 1 +#elif defined __QNX__ +# define GTEST_OS_QNX 1 +#endif // __CYGWIN__ + +#ifndef GTEST_LANG_CXX11 +// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when +// -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a +// value for __cplusplus, and recent versions of clang, gcc, and +// probably other compilers set that too in C++11 mode. +# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L +// Compiling in at least C++11 mode. +# define GTEST_LANG_CXX11 1 +# else +# define GTEST_LANG_CXX11 0 +# endif +#endif + +// Brings in definitions for functions used in the testing::internal::posix +// namespace (read, write, close, chdir, isatty, stat). We do not currently +// use them on Windows Mobile. +#if !GTEST_OS_WINDOWS +// This assumes that non-Windows OSes provide unistd.h. For OSes where this +// is not the case, we need to include headers that provide the functions +// mentioned above. +# include <unistd.h> +# include <strings.h> +#elif !GTEST_OS_WINDOWS_MOBILE +# include <direct.h> +# include <io.h> +#endif + +#if GTEST_OS_LINUX_ANDROID +// Used to define __ANDROID_API__ matching the target NDK API level. +# include <android/api-level.h> // NOLINT +#endif + +// Defines this to true iff Google Test can use POSIX regular expressions. +#ifndef GTEST_HAS_POSIX_RE +# if GTEST_OS_LINUX_ANDROID +// On Android, <regex.h> is only available starting with Gingerbread. +# define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9) +# else +# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS) +# endif +#endif + +#if GTEST_HAS_POSIX_RE + +// On some platforms, <regex.h> needs someone to define size_t, and +// won't compile otherwise. We can #include it here as we already +// included <stdlib.h>, which is guaranteed to define size_t through +// <stddef.h>. +# include <regex.h> // NOLINT + +# define GTEST_USES_POSIX_RE 1 + +#elif GTEST_OS_WINDOWS + +// <regex.h> is not available on Windows. Use our own simple regex +// implementation instead. +# define GTEST_USES_SIMPLE_RE 1 + +#else + +// <regex.h> may not be available on this platform. Use our own +// simple regex implementation instead. +# define GTEST_USES_SIMPLE_RE 1 + +#endif // GTEST_HAS_POSIX_RE + +#ifndef GTEST_HAS_EXCEPTIONS +// The user didn't tell us whether exceptions are enabled, so we need +// to figure it out. +# if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS +// macro to enable exceptions, so we'll do the same. +// Assumes that exceptions are enabled by default. +# ifndef _HAS_EXCEPTIONS +# define _HAS_EXCEPTIONS 1 +# endif // _HAS_EXCEPTIONS +# define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS +# elif defined(__GNUC__) && __EXCEPTIONS +// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__SUNPRO_CC) +// Sun Pro CC supports exceptions. However, there is no compile-time way of +// detecting whether they are enabled or not. Therefore, we assume that +// they are enabled unless the user tells us otherwise. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__IBMCPP__) && __EXCEPTIONS +// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__HP_aCC) +// Exception handling is in effect by default in HP aCC compiler. It has to +// be turned of by +noeh compiler option if desired. +# define GTEST_HAS_EXCEPTIONS 1 +# else +// For other compilers, we assume exceptions are disabled to be +// conservative. +# define GTEST_HAS_EXCEPTIONS 0 +# endif // defined(_MSC_VER) || defined(__BORLANDC__) +#endif // GTEST_HAS_EXCEPTIONS + +#if !defined(GTEST_HAS_STD_STRING) +// Even though we don't use this macro any longer, we keep it in case +// some clients still depend on it. +# define GTEST_HAS_STD_STRING 1 +#elif !GTEST_HAS_STD_STRING +// The user told us that ::std::string isn't available. +# error "Google Test cannot be used where ::std::string isn't available." +#endif // !defined(GTEST_HAS_STD_STRING) + +#ifndef GTEST_HAS_GLOBAL_STRING +// The user didn't tell us whether ::string is available, so we need +// to figure it out. + +# define GTEST_HAS_GLOBAL_STRING 0 + +#endif // GTEST_HAS_GLOBAL_STRING + +#ifndef GTEST_HAS_STD_WSTRING +// The user didn't tell us whether ::std::wstring is available, so we need +// to figure it out. +// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring +// is available. + +// Cygwin 1.7 and below doesn't support ::std::wstring. +// Solaris' libc++ doesn't support it either. Android has +// no support for it at least as recent as Froyo (2.2). +# define GTEST_HAS_STD_WSTRING \ + (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS)) + +#endif // GTEST_HAS_STD_WSTRING + +#ifndef GTEST_HAS_GLOBAL_WSTRING +// The user didn't tell us whether ::wstring is available, so we need +// to figure it out. +# define GTEST_HAS_GLOBAL_WSTRING \ + (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING) +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Determines whether RTTI is available. +#ifndef GTEST_HAS_RTTI +// The user didn't tell us whether RTTI is enabled, so we need to +// figure it out. + +# ifdef _MSC_VER + +# ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled. +# define GTEST_HAS_RTTI 1 +# else +# define GTEST_HAS_RTTI 0 +# endif + +// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled. +# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302) + +# ifdef __GXX_RTTI +// When building against STLport with the Android NDK and with +// -frtti -fno-exceptions, the build fails at link time with undefined +// references to __cxa_bad_typeid. Note sure if STL or toolchain bug, +// so disable RTTI when detected. +# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \ + !defined(__EXCEPTIONS) +# define GTEST_HAS_RTTI 0 +# else +# define GTEST_HAS_RTTI 1 +# endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS +# else +# define GTEST_HAS_RTTI 0 +# endif // __GXX_RTTI + +// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends +// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the +// first version with C++ support. +# elif defined(__clang__) + +# define GTEST_HAS_RTTI __has_feature(cxx_rtti) + +// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if +// both the typeid and dynamic_cast features are present. +# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900) + +# ifdef __RTTI_ALL__ +# define GTEST_HAS_RTTI 1 +# else +# define GTEST_HAS_RTTI 0 +# endif + +# else + +// For all other compilers, we assume RTTI is enabled. +# define GTEST_HAS_RTTI 1 + +# endif // _MSC_VER + +#endif // GTEST_HAS_RTTI + +// It's this header's responsibility to #include <typeinfo> when RTTI +// is enabled. +#if GTEST_HAS_RTTI +# include <typeinfo> +#endif + +// Determines whether Google Test can use the pthreads library. +#ifndef GTEST_HAS_PTHREAD +// The user didn't tell us explicitly, so we assume pthreads support is +// available on Linux and Mac. +// +// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0 +// to your compiler flags. +# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \ + || GTEST_OS_QNX) +#endif // GTEST_HAS_PTHREAD + +#if GTEST_HAS_PTHREAD +// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is +// true. +# include <pthread.h> // NOLINT + +// For timespec and nanosleep, used below. +# include <time.h> // NOLINT +#endif + +// Determines whether Google Test can use tr1/tuple. You can define +// this macro to 0 to prevent Google Test from using tuple (any +// feature depending on tuple with be disabled in this mode). +#ifndef GTEST_HAS_TR1_TUPLE +# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) +// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>. +# define GTEST_HAS_TR1_TUPLE 0 +# else +// The user didn't tell us not to do it, so we assume it's OK. +# define GTEST_HAS_TR1_TUPLE 1 +# endif +#endif // GTEST_HAS_TR1_TUPLE + +// Determines whether Google Test's own tr1 tuple implementation +// should be used. +#ifndef GTEST_USE_OWN_TR1_TUPLE +// The user didn't tell us, so we need to figure it out. + +// We use our own TR1 tuple if we aren't sure the user has an +// implementation of it already. At this time, libstdc++ 4.0.0+ and +// MSVC 2010 are the only mainstream standard libraries that come +// with a TR1 tuple implementation. NVIDIA's CUDA NVCC compiler +// pretends to be GCC by defining __GNUC__ and friends, but cannot +// compile GCC's tuple implementation. MSVC 2008 (9.0) provides TR1 +// tuple in a 323 MB Feature Pack download, which we cannot assume the +// user has. QNX's QCC compiler is a modified GCC but it doesn't +// support TR1 tuple. libc++ only provides std::tuple, in C++11 mode, +// and it can be used with some compilers that define __GNUC__. +# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \ + && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600 +# define GTEST_ENV_HAS_TR1_TUPLE_ 1 +# endif + +// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used +// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6 +// can build with clang but need to use gcc4.2's libstdc++). +# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325) +# define GTEST_ENV_HAS_STD_TUPLE_ 1 +# endif + +# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_ +# define GTEST_USE_OWN_TR1_TUPLE 0 +# else +# define GTEST_USE_OWN_TR1_TUPLE 1 +# endif + +#endif // GTEST_USE_OWN_TR1_TUPLE + +// To avoid conditional compilation everywhere, we make it +// gtest-port.h's responsibility to #include the header implementing +// tr1/tuple. +#if GTEST_HAS_TR1_TUPLE + +# if GTEST_USE_OWN_TR1_TUPLE +// This file was GENERATED by command: +// pump.py gtest-tuple.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2009 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Implements a subset of TR1 tuple needed by Google Test and Google Mock. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ + +#include <utility> // For ::std::pair. + +// The compiler used in Symbian has a bug that prevents us from declaring the +// tuple template as a friend (it complains that tuple is redefined). This +// hack bypasses the bug by declaring the members that should otherwise be +// private as public. +// Sun Studio versions < 12 also have the above bug. +#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) +# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public: +#else +# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \ + template <GTEST_10_TYPENAMES_(U)> friend class tuple; \ + private: +#endif + +// GTEST_n_TUPLE_(T) is the type of an n-tuple. +#define GTEST_0_TUPLE_(T) tuple<> +#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \ + void, void, void> +#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \ + void, void, void> +#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \ + void, void, void> +#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \ + void, void, void> +#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \ + void, void, void> +#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \ + void, void, void> +#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \ + void, void, void> +#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \ + T##7, void, void> +#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \ + T##7, T##8, void> +#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \ + T##7, T##8, T##9> + +// GTEST_n_TYPENAMES_(T) declares a list of n typenames. +#define GTEST_0_TYPENAMES_(T) +#define GTEST_1_TYPENAMES_(T) typename T##0 +#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1 +#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2 +#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3 +#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4 +#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5 +#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6 +#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, typename T##7 +#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, \ + typename T##7, typename T##8 +#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, \ + typename T##7, typename T##8, typename T##9 + +// In theory, defining stuff in the ::std namespace is undefined +// behavior. We can do this as we are playing the role of a standard +// library vendor. +namespace std { +namespace tr1 { + +template <typename T0 = void, typename T1 = void, typename T2 = void, + typename T3 = void, typename T4 = void, typename T5 = void, + typename T6 = void, typename T7 = void, typename T8 = void, + typename T9 = void> +class tuple; + +// Anything in namespace gtest_internal is Google Test's INTERNAL +// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code. +namespace gtest_internal { + +// ByRef<T>::type is T if T is a reference; otherwise it's const T&. +template <typename T> +struct ByRef { typedef const T& type; }; // NOLINT +template <typename T> +struct ByRef<T&> { typedef T& type; }; // NOLINT + +// A handy wrapper for ByRef. +#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type + +// AddRef<T>::type is T if T is a reference; otherwise it's T&. This +// is the same as tr1::add_reference<T>::type. +template <typename T> +struct AddRef { typedef T& type; }; // NOLINT +template <typename T> +struct AddRef<T&> { typedef T& type; }; // NOLINT + +// A handy wrapper for AddRef. +#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type + +// A helper for implementing get<k>(). +template <int k> class Get; + +// A helper for implementing tuple_element<k, T>. kIndexValid is true +// iff k < the number of fields in tuple type T. +template <bool kIndexValid, int kIndex, class Tuple> +struct TupleElement; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > { + typedef T0 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > { + typedef T1 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > { + typedef T2 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > { + typedef T3 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > { + typedef T4 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > { + typedef T5 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > { + typedef T6 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > { + typedef T7 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > { + typedef T8 type; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > { + typedef T9 type; +}; + +} // namespace gtest_internal + +template <> +class tuple<> { + public: + tuple() {} + tuple(const tuple& /* t */) {} + tuple& operator=(const tuple& /* t */) { return *this; } +}; + +template <GTEST_1_TYPENAMES_(T)> +class GTEST_1_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {} + + tuple(const tuple& t) : f0_(t.f0_) {} + + template <GTEST_1_TYPENAMES_(U)> + tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_1_TYPENAMES_(U)> + tuple& operator=(const GTEST_1_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_1_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) { + f0_ = t.f0_; + return *this; + } + + T0 f0_; +}; + +template <GTEST_2_TYPENAMES_(T)> +class GTEST_2_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0), + f1_(f1) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {} + + template <GTEST_2_TYPENAMES_(U)> + tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {} + template <typename U0, typename U1> + tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_2_TYPENAMES_(U)> + tuple& operator=(const GTEST_2_TUPLE_(U)& t) { + return CopyFrom(t); + } + template <typename U0, typename U1> + tuple& operator=(const ::std::pair<U0, U1>& p) { + f0_ = p.first; + f1_ = p.second; + return *this; + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_2_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + return *this; + } + + T0 f0_; + T1 f1_; +}; + +template <GTEST_3_TYPENAMES_(T)> +class GTEST_3_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} + + template <GTEST_3_TYPENAMES_(U)> + tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_3_TYPENAMES_(U)> + tuple& operator=(const GTEST_3_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_3_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; +}; + +template <GTEST_4_TYPENAMES_(T)> +class GTEST_4_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {} + + template <GTEST_4_TYPENAMES_(U)> + tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_4_TYPENAMES_(U)> + tuple& operator=(const GTEST_4_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_4_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; +}; + +template <GTEST_5_TYPENAMES_(T)> +class GTEST_5_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, + GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_) {} + + template <GTEST_5_TYPENAMES_(U)> + tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_5_TYPENAMES_(U)> + tuple& operator=(const GTEST_5_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_5_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; +}; + +template <GTEST_6_TYPENAMES_(T)> +class GTEST_6_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_) {} + + template <GTEST_6_TYPENAMES_(U)> + tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_6_TYPENAMES_(U)> + tuple& operator=(const GTEST_6_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_6_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; +}; + +template <GTEST_7_TYPENAMES_(T)> +class GTEST_7_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3), f4_(f4), f5_(f5), f6_(f6) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} + + template <GTEST_7_TYPENAMES_(U)> + tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_7_TYPENAMES_(U)> + tuple& operator=(const GTEST_7_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_7_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; +}; + +template <GTEST_8_TYPENAMES_(T)> +class GTEST_8_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, + GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5), f6_(f6), f7_(f7) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} + + template <GTEST_8_TYPENAMES_(U)> + tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_8_TYPENAMES_(U)> + tuple& operator=(const GTEST_8_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_8_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; +}; + +template <GTEST_9_TYPENAMES_(T)> +class GTEST_9_TUPLE_(T) { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, + GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5), f6_(f6), f7_(f7), f8_(f8) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} + + template <GTEST_9_TYPENAMES_(U)> + tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_9_TYPENAMES_(U)> + tuple& operator=(const GTEST_9_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_9_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + f8_ = t.f8_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; + T8 f8_; +}; + +template <GTEST_10_TYPENAMES_(T)> +class tuple { + public: + template <int k> friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(), + f9_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, + GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {} + + template <GTEST_10_TYPENAMES_(U)> + tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), + f9_(t.f9_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template <GTEST_10_TYPENAMES_(U)> + tuple& operator=(const GTEST_10_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template <GTEST_10_TYPENAMES_(U)> + tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + f8_ = t.f8_; + f9_ = t.f9_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; + T8 f8_; + T9 f9_; +}; + +// 6.1.3.2 Tuple creation functions. + +// Known limitations: we don't support passing an +// std::tr1::reference_wrapper<T> to make_tuple(). And we don't +// implement tie(). + +inline tuple<> make_tuple() { return tuple<>(); } + +template <GTEST_1_TYPENAMES_(T)> +inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) { + return GTEST_1_TUPLE_(T)(f0); +} + +template <GTEST_2_TYPENAMES_(T)> +inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) { + return GTEST_2_TUPLE_(T)(f0, f1); +} + +template <GTEST_3_TYPENAMES_(T)> +inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) { + return GTEST_3_TUPLE_(T)(f0, f1, f2); +} + +template <GTEST_4_TYPENAMES_(T)> +inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3) { + return GTEST_4_TUPLE_(T)(f0, f1, f2, f3); +} + +template <GTEST_5_TYPENAMES_(T)> +inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4) { + return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4); +} + +template <GTEST_6_TYPENAMES_(T)> +inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5) { + return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5); +} + +template <GTEST_7_TYPENAMES_(T)> +inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6) { + return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6); +} + +template <GTEST_8_TYPENAMES_(T)> +inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) { + return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7); +} + +template <GTEST_9_TYPENAMES_(T)> +inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, + const T8& f8) { + return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8); +} + +template <GTEST_10_TYPENAMES_(T)> +inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, + const T8& f8, const T9& f9) { + return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); +} + +// 6.1.3.3 Tuple helper classes. + +template <typename Tuple> struct tuple_size; + +template <GTEST_0_TYPENAMES_(T)> +struct tuple_size<GTEST_0_TUPLE_(T) > { + static const int value = 0; +}; + +template <GTEST_1_TYPENAMES_(T)> +struct tuple_size<GTEST_1_TUPLE_(T) > { + static const int value = 1; +}; + +template <GTEST_2_TYPENAMES_(T)> +struct tuple_size<GTEST_2_TUPLE_(T) > { + static const int value = 2; +}; + +template <GTEST_3_TYPENAMES_(T)> +struct tuple_size<GTEST_3_TUPLE_(T) > { + static const int value = 3; +}; + +template <GTEST_4_TYPENAMES_(T)> +struct tuple_size<GTEST_4_TUPLE_(T) > { + static const int value = 4; +}; + +template <GTEST_5_TYPENAMES_(T)> +struct tuple_size<GTEST_5_TUPLE_(T) > { + static const int value = 5; +}; + +template <GTEST_6_TYPENAMES_(T)> +struct tuple_size<GTEST_6_TUPLE_(T) > { + static const int value = 6; +}; + +template <GTEST_7_TYPENAMES_(T)> +struct tuple_size<GTEST_7_TUPLE_(T) > { + static const int value = 7; +}; + +template <GTEST_8_TYPENAMES_(T)> +struct tuple_size<GTEST_8_TUPLE_(T) > { + static const int value = 8; +}; + +template <GTEST_9_TYPENAMES_(T)> +struct tuple_size<GTEST_9_TUPLE_(T) > { + static const int value = 9; +}; + +template <GTEST_10_TYPENAMES_(T)> +struct tuple_size<GTEST_10_TUPLE_(T) > { + static const int value = 10; +}; + +template <int k, class Tuple> +struct tuple_element { + typedef typename gtest_internal::TupleElement< + k < (tuple_size<Tuple>::value), k, Tuple>::type type; +}; + +#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type + +// 6.1.3.4 Element access. + +namespace gtest_internal { + +template <> +class Get<0> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) + Field(Tuple& t) { return t.f0_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) + ConstField(const Tuple& t) { return t.f0_; } +}; + +template <> +class Get<1> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) + Field(Tuple& t) { return t.f1_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) + ConstField(const Tuple& t) { return t.f1_; } +}; + +template <> +class Get<2> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) + Field(Tuple& t) { return t.f2_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) + ConstField(const Tuple& t) { return t.f2_; } +}; + +template <> +class Get<3> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) + Field(Tuple& t) { return t.f3_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) + ConstField(const Tuple& t) { return t.f3_; } +}; + +template <> +class Get<4> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) + Field(Tuple& t) { return t.f4_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) + ConstField(const Tuple& t) { return t.f4_; } +}; + +template <> +class Get<5> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) + Field(Tuple& t) { return t.f5_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) + ConstField(const Tuple& t) { return t.f5_; } +}; + +template <> +class Get<6> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) + Field(Tuple& t) { return t.f6_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) + ConstField(const Tuple& t) { return t.f6_; } +}; + +template <> +class Get<7> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) + Field(Tuple& t) { return t.f7_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) + ConstField(const Tuple& t) { return t.f7_; } +}; + +template <> +class Get<8> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) + Field(Tuple& t) { return t.f8_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) + ConstField(const Tuple& t) { return t.f8_; } +}; + +template <> +class Get<9> { + public: + template <class Tuple> + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) + Field(Tuple& t) { return t.f9_; } // NOLINT + + template <class Tuple> + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) + ConstField(const Tuple& t) { return t.f9_; } +}; + +} // namespace gtest_internal + +template <int k, GTEST_10_TYPENAMES_(T)> +GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) +get(GTEST_10_TUPLE_(T)& t) { + return gtest_internal::Get<k>::Field(t); +} + +template <int k, GTEST_10_TYPENAMES_(T)> +GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) +get(const GTEST_10_TUPLE_(T)& t) { + return gtest_internal::Get<k>::ConstField(t); +} + +// 6.1.3.5 Relational operators + +// We only implement == and !=, as we don't have a need for the rest yet. + +namespace gtest_internal { + +// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the +// first k fields of t1 equals the first k fields of t2. +// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if +// k1 != k2. +template <int kSize1, int kSize2> +struct SameSizeTuplePrefixComparator; + +template <> +struct SameSizeTuplePrefixComparator<0, 0> { + template <class Tuple1, class Tuple2> + static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) { + return true; + } +}; + +template <int k> +struct SameSizeTuplePrefixComparator<k, k> { + template <class Tuple1, class Tuple2> + static bool Eq(const Tuple1& t1, const Tuple2& t2) { + return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) && + ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2); + } +}; + +} // namespace gtest_internal + +template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)> +inline bool operator==(const GTEST_10_TUPLE_(T)& t, + const GTEST_10_TUPLE_(U)& u) { + return gtest_internal::SameSizeTuplePrefixComparator< + tuple_size<GTEST_10_TUPLE_(T) >::value, + tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u); +} + +template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)> +inline bool operator!=(const GTEST_10_TUPLE_(T)& t, + const GTEST_10_TUPLE_(U)& u) { return !(t == u); } + +// 6.1.4 Pairs. +// Unimplemented. + +} // namespace tr1 +} // namespace std + +#undef GTEST_0_TUPLE_ +#undef GTEST_1_TUPLE_ +#undef GTEST_2_TUPLE_ +#undef GTEST_3_TUPLE_ +#undef GTEST_4_TUPLE_ +#undef GTEST_5_TUPLE_ +#undef GTEST_6_TUPLE_ +#undef GTEST_7_TUPLE_ +#undef GTEST_8_TUPLE_ +#undef GTEST_9_TUPLE_ +#undef GTEST_10_TUPLE_ + +#undef GTEST_0_TYPENAMES_ +#undef GTEST_1_TYPENAMES_ +#undef GTEST_2_TYPENAMES_ +#undef GTEST_3_TYPENAMES_ +#undef GTEST_4_TYPENAMES_ +#undef GTEST_5_TYPENAMES_ +#undef GTEST_6_TYPENAMES_ +#undef GTEST_7_TYPENAMES_ +#undef GTEST_8_TYPENAMES_ +#undef GTEST_9_TYPENAMES_ +#undef GTEST_10_TYPENAMES_ + +#undef GTEST_DECLARE_TUPLE_AS_FRIEND_ +#undef GTEST_BY_REF_ +#undef GTEST_ADD_REF_ +#undef GTEST_TUPLE_ELEMENT_ + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ +# elif GTEST_ENV_HAS_STD_TUPLE_ +# include <tuple> +// C++11 puts its tuple into the ::std namespace rather than +// ::std::tr1. gtest expects tuple to live in ::std::tr1, so put it there. +// This causes undefined behavior, but supported compilers react in +// the way we intend. +namespace std { +namespace tr1 { +using ::std::get; +using ::std::make_tuple; +using ::std::tuple; +using ::std::tuple_element; +using ::std::tuple_size; +} +} + +# elif GTEST_OS_SYMBIAN + +// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to +// use STLport's tuple implementation, which unfortunately doesn't +// work as the copy of STLport distributed with Symbian is incomplete. +// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to +// use its own tuple implementation. +# ifdef BOOST_HAS_TR1_TUPLE +# undef BOOST_HAS_TR1_TUPLE +# endif // BOOST_HAS_TR1_TUPLE + +// This prevents <boost/tr1/detail/config.hpp>, which defines +// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>. +# define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED +# include <tuple> + +# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000) +// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header. This does +// not conform to the TR1 spec, which requires the header to be <tuple>. + +# if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 +// Until version 4.3.2, gcc has a bug that causes <tr1/functional>, +// which is #included by <tr1/tuple>, to not compile when RTTI is +// disabled. _TR1_FUNCTIONAL is the header guard for +// <tr1/functional>. Hence the following #define is a hack to prevent +// <tr1/functional> from being included. +# define _TR1_FUNCTIONAL 1 +# include <tr1/tuple> +# undef _TR1_FUNCTIONAL // Allows the user to #include + // <tr1/functional> if he chooses to. +# else +# include <tr1/tuple> // NOLINT +# endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 + +# else +// If the compiler is not GCC 4.0+, we assume the user is using a +// spec-conforming TR1 implementation. +# include <tuple> // NOLINT +# endif // GTEST_USE_OWN_TR1_TUPLE + +#endif // GTEST_HAS_TR1_TUPLE + +// Determines whether clone(2) is supported. +// Usually it will only be available on Linux, excluding +// Linux on the Itanium architecture. +// Also see http://linux.die.net/man/2/clone. +#ifndef GTEST_HAS_CLONE +// The user didn't tell us, so we need to figure it out. + +# if GTEST_OS_LINUX && !defined(__ia64__) +# if GTEST_OS_LINUX_ANDROID +// On Android, clone() is only available on ARM starting with Gingerbread. +# if defined(__arm__) && __ANDROID_API__ >= 9 +# define GTEST_HAS_CLONE 1 +# else +# define GTEST_HAS_CLONE 0 +# endif +# else +# define GTEST_HAS_CLONE 1 +# endif +# else +# define GTEST_HAS_CLONE 0 +# endif // GTEST_OS_LINUX && !defined(__ia64__) + +#endif // GTEST_HAS_CLONE + +// Determines whether to support stream redirection. This is used to test +// output correctness and to implement death tests. +#ifndef GTEST_HAS_STREAM_REDIRECTION +// By default, we assume that stream redirection is supported on all +// platforms except known mobile ones. +# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN +# define GTEST_HAS_STREAM_REDIRECTION 0 +# else +# define GTEST_HAS_STREAM_REDIRECTION 1 +# endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN +#endif // GTEST_HAS_STREAM_REDIRECTION + +// Determines whether to support death tests. +// Google Test does not support death tests for VC 7.1 and earlier as +// abort() in a VC 7.1 application compiled as GUI in debug config +// pops up a dialog window that cannot be suppressed programmatically. +#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ + (GTEST_OS_MAC && !GTEST_OS_IOS) || GTEST_OS_IOS_SIMULATOR || \ + (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ + GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \ + GTEST_OS_OPENBSD || GTEST_OS_QNX) +# define GTEST_HAS_DEATH_TEST 1 +# include <vector> // NOLINT +#endif + +// We don't support MSVC 7.1 with exceptions disabled now. Therefore +// all the compilers we care about are adequate for supporting +// value-parameterized tests. +#define GTEST_HAS_PARAM_TEST 1 + +// Determines whether to support type-driven tests. + +// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0, +// Sun Pro CC, IBM Visual Age, and HP aCC support. +#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \ + defined(__IBMCPP__) || defined(__HP_aCC) +# define GTEST_HAS_TYPED_TEST 1 +# define GTEST_HAS_TYPED_TEST_P 1 +#endif + +// Determines whether to support Combine(). This only makes sense when +// value-parameterized tests are enabled. The implementation doesn't +// work on Sun Studio since it doesn't understand templated conversion +// operators. +#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC) +# define GTEST_HAS_COMBINE 1 +#endif + +// Determines whether the system compiler uses UTF-16 for encoding wide strings. +#define GTEST_WIDE_STRING_USES_UTF16_ \ + (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX) + +// Determines whether test results can be streamed to a socket. +#if GTEST_OS_LINUX +# define GTEST_CAN_STREAM_RESULTS_ 1 +#endif + +// Defines some utility macros. + +// The GNU compiler emits a warning if nested "if" statements are followed by +// an "else" statement and braces are not used to explicitly disambiguate the +// "else" binding. This leads to problems with code like: +// +// if (gate) +// ASSERT_*(condition) << "Some message"; +// +// The "switch (0) case 0:" idiom is used to suppress this. +#ifdef __INTEL_COMPILER +# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ +#else +# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT +#endif + +// Use this annotation at the end of a struct/class definition to +// prevent the compiler from optimizing away instances that are never +// used. This is useful when all interesting logic happens inside the +// c'tor and / or d'tor. Example: +// +// struct Foo { +// Foo() { ... } +// } GTEST_ATTRIBUTE_UNUSED_; +// +// Also use it after a variable or parameter declaration to tell the +// compiler the variable/parameter does not have to be used. +#if defined(__GNUC__) && !defined(COMPILER_ICC) +# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused)) +#else +# define GTEST_ATTRIBUTE_UNUSED_ +#endif + +// A macro to disallow operator= +// This should be used in the private: declarations for a class. +#define GTEST_DISALLOW_ASSIGN_(type)\ + void operator=(type const &) + +// A macro to disallow copy constructor and operator= +// This should be used in the private: declarations for a class. +#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\ + type(type const &);\ + GTEST_DISALLOW_ASSIGN_(type) + +// Tell the compiler to warn about unused return values for functions declared +// with this macro. The macro should be used on function declarations +// following the argument list: +// +// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_; +#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC) +# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result)) +#else +# define GTEST_MUST_USE_RESULT_ +#endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC + +// Determine whether the compiler supports Microsoft's Structured Exception +// Handling. This is supported by several Windows compilers but generally +// does not exist on any other system. +#ifndef GTEST_HAS_SEH +// The user didn't tell us, so we need to figure it out. + +# if defined(_MSC_VER) || defined(__BORLANDC__) +// These two compilers are known to support SEH. +# define GTEST_HAS_SEH 1 +# else +// Assume no SEH. +# define GTEST_HAS_SEH 0 +# endif + +#endif // GTEST_HAS_SEH + +#ifdef _MSC_VER + +# if GTEST_LINKED_AS_SHARED_LIBRARY +# define GTEST_API_ __declspec(dllimport) +# elif GTEST_CREATE_SHARED_LIBRARY +# define GTEST_API_ __declspec(dllexport) +# endif + +#endif // _MSC_VER + +#ifndef GTEST_API_ +# define GTEST_API_ +#endif + +#ifdef __GNUC__ +// Ask the compiler to never inline a given function. +# define GTEST_NO_INLINE_ __attribute__((noinline)) +#else +# define GTEST_NO_INLINE_ +#endif + +// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project. +#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) +# define GTEST_HAS_CXXABI_H_ 1 +#else +# define GTEST_HAS_CXXABI_H_ 0 +#endif + +namespace testing { + +class Message; + +namespace internal { + +// A secret type that Google Test users don't know about. It has no +// definition on purpose. Therefore it's impossible to create a +// Secret object, which is what we want. +class Secret; + +// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. + +template <bool> +struct CompileAssert { +}; + +#define GTEST_COMPILE_ASSERT_(expr, msg) \ + typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \ + msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_ + +// Implementation details of GTEST_COMPILE_ASSERT_: +// +// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert<bool(expr)> +// +// instead, these compilers will refuse to compile +// +// GTEST_COMPILE_ASSERT_(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h. +// +// This template is declared, but intentionally undefined. +template <typename T1, typename T2> +struct StaticAssertTypeEqHelper; + +template <typename T> +struct StaticAssertTypeEqHelper<T, T> {}; + +#if GTEST_HAS_GLOBAL_STRING +typedef ::string string; +#else +typedef ::std::string string; +#endif // GTEST_HAS_GLOBAL_STRING + +#if GTEST_HAS_GLOBAL_WSTRING +typedef ::wstring wstring; +#elif GTEST_HAS_STD_WSTRING +typedef ::std::wstring wstring; +#endif // GTEST_HAS_GLOBAL_WSTRING + +// A helper for suppressing warnings on constant condition. It just +// returns 'condition'. +GTEST_API_ bool IsTrue(bool condition); + +// Defines scoped_ptr. + +// This implementation of scoped_ptr is PARTIAL - it only contains +// enough stuff to satisfy Google Test's need. +template <typename T> +class scoped_ptr { + public: + typedef T element_type; + + explicit scoped_ptr(T* p = NULL) : ptr_(p) {} + ~scoped_ptr() { reset(); } + + T& operator*() const { return *ptr_; } + T* operator->() const { return ptr_; } + T* get() const { return ptr_; } + + T* release() { + T* const ptr = ptr_; + ptr_ = NULL; + return ptr; + } + + void reset(T* p = NULL) { + if (p != ptr_) { + if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type. + delete ptr_; + } + ptr_ = p; + } + } + + private: + T* ptr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr); +}; + +// Defines RE. + +// A simple C++ wrapper for <regex.h>. It uses the POSIX Extended +// Regular Expression syntax. +class GTEST_API_ RE { + public: + // A copy constructor is required by the Standard to initialize object + // references from r-values. + RE(const RE& other) { Init(other.pattern()); } + + // Constructs an RE from a string. + RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT + +#if GTEST_HAS_GLOBAL_STRING + + RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT + +#endif // GTEST_HAS_GLOBAL_STRING + + RE(const char* regex) { Init(regex); } // NOLINT + ~RE(); + + // Returns the string representation of the regex. + const char* pattern() const { return pattern_; } + + // FullMatch(str, re) returns true iff regular expression re matches + // the entire str. + // PartialMatch(str, re) returns true iff regular expression re + // matches a substring of str (including str itself). + // + // TODO(wan@google.com): make FullMatch() and PartialMatch() work + // when str contains NUL characters. + static bool FullMatch(const ::std::string& str, const RE& re) { + return FullMatch(str.c_str(), re); + } + static bool PartialMatch(const ::std::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } + +#if GTEST_HAS_GLOBAL_STRING + + static bool FullMatch(const ::string& str, const RE& re) { + return FullMatch(str.c_str(), re); + } + static bool PartialMatch(const ::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } + +#endif // GTEST_HAS_GLOBAL_STRING + + static bool FullMatch(const char* str, const RE& re); + static bool PartialMatch(const char* str, const RE& re); + + private: + void Init(const char* regex); + + // We use a const char* instead of an std::string, as Google Test used to be + // used where std::string is not available. TODO(wan@google.com): change to + // std::string. + const char* pattern_; + bool is_valid_; + +#if GTEST_USES_POSIX_RE + + regex_t full_regex_; // For FullMatch(). + regex_t partial_regex_; // For PartialMatch(). + +#else // GTEST_USES_SIMPLE_RE + + const char* full_pattern_; // For FullMatch(); + +#endif + + GTEST_DISALLOW_ASSIGN_(RE); +}; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line); + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file, + int line); + +// Defines logging utilities: +// GTEST_LOG_(severity) - logs messages at the specified severity level. The +// message itself is streamed into the macro. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. + +enum GTestLogSeverity { + GTEST_INFO, + GTEST_WARNING, + GTEST_ERROR, + GTEST_FATAL +}; + +// Formats log entry severity, provides a stream object for streaming the +// log message, and terminates the message with a newline when going out of +// scope. +class GTEST_API_ GTestLog { + public: + GTestLog(GTestLogSeverity severity, const char* file, int line); + + // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. + ~GTestLog(); + + ::std::ostream& GetStream() { return ::std::cerr; } + + private: + const GTestLogSeverity severity_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog); +}; + +#define GTEST_LOG_(severity) \ + ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \ + __FILE__, __LINE__).GetStream() + +inline void LogToStderr() {} +inline void FlushInfoLog() { fflush(NULL); } + +// INTERNAL IMPLEMENTATION - DO NOT USE. +// +// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition +// is not satisfied. +// Synopsys: +// GTEST_CHECK_(boolean_condition); +// or +// GTEST_CHECK_(boolean_condition) << "Additional message"; +// +// This checks the condition and if the condition is not satisfied +// it prints message about the condition violation, including the +// condition itself, plus additional message streamed into it, if any, +// and then it aborts the program. It aborts the program irrespective of +// whether it is built in the debug mode or not. +#define GTEST_CHECK_(condition) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::IsTrue(condition)) \ + ; \ + else \ + GTEST_LOG_(FATAL) << "Condition " #condition " failed. " + +// An all-mode assert to verify that the given POSIX-style function +// call returns 0 (indicating success). Known limitation: this +// doesn't expand to a balanced 'if' statement, so enclose the macro +// in {} if you need to use it as the only statement in an 'if' +// branch. +#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \ + if (const int gtest_error = (posix_call)) \ + GTEST_LOG_(FATAL) << #posix_call << "failed with error " \ + << gtest_error + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Use ImplicitCast_ as a safe version of static_cast for upcasting in +// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a +// const Foo*). When you use ImplicitCast_, the compiler checks that +// the cast is safe. Such explicit ImplicitCast_s are necessary in +// surprisingly many situations where C++ demands an exact type match +// instead of an argument type convertable to a target type. +// +// The syntax for using ImplicitCast_ is the same as for static_cast: +// +// ImplicitCast_<ToType>(expr) +// +// ImplicitCast_ would have been part of the C++ standard library, +// but the proposal was submitted too late. It will probably make +// its way into the language in the future. +// +// This relatively ugly name is intentional. It prevents clashes with +// similar functions users may have (e.g., implicit_cast). The internal +// namespace alone is not enough because the function can be found by ADL. +template<typename To> +inline To ImplicitCast_(To x) { return x; } + +// When you upcast (that is, cast a pointer from type Foo to type +// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts +// always succeed. When you downcast (that is, cast a pointer from +// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because +// how do you know the pointer is really of type SubclassOfFoo? It +// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus, +// when you downcast, you should use this macro. In debug mode, we +// use dynamic_cast<> to double-check the downcast is legal (we die +// if it's not). In normal mode, we do the efficient static_cast<> +// instead. Thus, it's important to test in debug mode to make sure +// the cast is legal! +// This is the only place in the code we should use dynamic_cast<>. +// In particular, you SHOULDN'T be using dynamic_cast<> in order to +// do RTTI (eg code like this: +// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo); +// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo); +// You should design the code some other way not to need this. +// +// This relatively ugly name is intentional. It prevents clashes with +// similar functions users may have (e.g., down_cast). The internal +// namespace alone is not enough because the function can be found by ADL. +template<typename To, typename From> // use like this: DownCast_<T*>(foo); +inline To DownCast_(From* f) { // so we only accept pointers + // Ensures that To is a sub-type of From *. This test is here only + // for compile-time type checking, and has no overhead in an + // optimized build at run-time, as it will be optimized away + // completely. + if (false) { + const To to = NULL; + ::testing::internal::ImplicitCast_<From*>(to); + } + +#if GTEST_HAS_RTTI + // RTTI: debug mode only! + GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL); +#endif + return static_cast<To>(f); +} + +// Downcasts the pointer of type Base to Derived. +// Derived must be a subclass of Base. The parameter MUST +// point to a class of type Derived, not any subclass of it. +// When RTTI is available, the function performs a runtime +// check to enforce this. +template <class Derived, class Base> +Derived* CheckedDowncastToActualType(Base* base) { +#if GTEST_HAS_RTTI + GTEST_CHECK_(typeid(*base) == typeid(Derived)); + return dynamic_cast<Derived*>(base); // NOLINT +#else + return static_cast<Derived*>(base); // Poor man's downcast. +#endif +} + +#if GTEST_HAS_STREAM_REDIRECTION + +// Defines the stderr capturer: +// CaptureStdout - starts capturing stdout. +// GetCapturedStdout - stops capturing stdout and returns the captured string. +// CaptureStderr - starts capturing stderr. +// GetCapturedStderr - stops capturing stderr and returns the captured string. +// +GTEST_API_ void CaptureStdout(); +GTEST_API_ std::string GetCapturedStdout(); +GTEST_API_ void CaptureStderr(); +GTEST_API_ std::string GetCapturedStderr(); + +#endif // GTEST_HAS_STREAM_REDIRECTION + + +#if GTEST_HAS_DEATH_TEST + +const ::std::vector<testing::internal::string>& GetInjectableArgvs(); +void SetInjectableArgvs(const ::std::vector<testing::internal::string>* + new_argvs); + +// A copy of all command line arguments. Set by InitGoogleTest(). +extern ::std::vector<testing::internal::string> g_argvs; + +#endif // GTEST_HAS_DEATH_TEST + +// Defines synchronization primitives. + +#if GTEST_HAS_PTHREAD + +// Sleeps for (roughly) n milli-seconds. This function is only for +// testing Google Test's own constructs. Don't use it in user tests, +// either directly or indirectly. +inline void SleepMilliseconds(int n) { + const timespec time = { + 0, // 0 seconds. + n * 1000L * 1000L, // And n ms. + }; + nanosleep(&time, NULL); +} + +// Allows a controller thread to pause execution of newly created +// threads until notified. Instances of this class must be created +// and destroyed in the controller thread. +// +// This class is only for testing Google Test's own constructs. Do not +// use it in user tests, either directly or indirectly. +class Notification { + public: + Notification() : notified_(false) { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); + } + ~Notification() { + pthread_mutex_destroy(&mutex_); + } + + // Notifies all threads created with this notification to start. Must + // be called from the controller thread. + void Notify() { + pthread_mutex_lock(&mutex_); + notified_ = true; + pthread_mutex_unlock(&mutex_); + } + + // Blocks until the controller thread notifies. Must be called from a test + // thread. + void WaitForNotification() { + for (;;) { + pthread_mutex_lock(&mutex_); + const bool notified = notified_; + pthread_mutex_unlock(&mutex_); + if (notified) + break; + SleepMilliseconds(10); + } + } + + private: + pthread_mutex_t mutex_; + bool notified_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification); +}; + +// As a C-function, ThreadFuncWithCLinkage cannot be templated itself. +// Consequently, it cannot select a correct instantiation of ThreadWithParam +// in order to call its Run(). Introducing ThreadWithParamBase as a +// non-templated base class for ThreadWithParam allows us to bypass this +// problem. +class ThreadWithParamBase { + public: + virtual ~ThreadWithParamBase() {} + virtual void Run() = 0; +}; + +// pthread_create() accepts a pointer to a function type with the C linkage. +// According to the Standard (7.5/1), function types with different linkages +// are different even if they are otherwise identical. Some compilers (for +// example, SunStudio) treat them as different types. Since class methods +// cannot be defined with C-linkage we need to define a free C-function to +// pass into pthread_create(). +extern "C" inline void* ThreadFuncWithCLinkage(void* thread) { + static_cast<ThreadWithParamBase*>(thread)->Run(); + return NULL; +} + +// Helper class for testing Google Test's multi-threading constructs. +// To use it, write: +// +// void ThreadFunc(int param) { /* Do things with param */ } +// Notification thread_can_start; +// ... +// // The thread_can_start parameter is optional; you can supply NULL. +// ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start); +// thread_can_start.Notify(); +// +// These classes are only for testing Google Test's own constructs. Do +// not use them in user tests, either directly or indirectly. +template <typename T> +class ThreadWithParam : public ThreadWithParamBase { + public: + typedef void (*UserThreadFunc)(T); + + ThreadWithParam( + UserThreadFunc func, T param, Notification* thread_can_start) + : func_(func), + param_(param), + thread_can_start_(thread_can_start), + finished_(false) { + ThreadWithParamBase* const base = this; + // The thread can be created only after all fields except thread_ + // have been initialized. + GTEST_CHECK_POSIX_SUCCESS_( + pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base)); + } + ~ThreadWithParam() { Join(); } + + void Join() { + if (!finished_) { + GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0)); + finished_ = true; + } + } + + virtual void Run() { + if (thread_can_start_ != NULL) + thread_can_start_->WaitForNotification(); + func_(param_); + } + + private: + const UserThreadFunc func_; // User-supplied thread function. + const T param_; // User-supplied parameter to the thread function. + // When non-NULL, used to block execution until the controller thread + // notifies. + Notification* const thread_can_start_; + bool finished_; // true iff we know that the thread function has finished. + pthread_t thread_; // The native thread object. + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam); +}; + +// MutexBase and Mutex implement mutex on pthreads-based platforms. They +// are used in conjunction with class MutexLock: +// +// Mutex mutex; +// ... +// MutexLock lock(&mutex); // Acquires the mutex and releases it at the end +// // of the current scope. +// +// MutexBase implements behavior for both statically and dynamically +// allocated mutexes. Do not use MutexBase directly. Instead, write +// the following to define a static mutex: +// +// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex); +// +// You can forward declare a static mutex like this: +// +// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex); +// +// To create a dynamic mutex, just define an object of type Mutex. +class MutexBase { + public: + // Acquires this mutex. + void Lock() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_)); + owner_ = pthread_self(); + has_owner_ = true; + } + + // Releases this mutex. + void Unlock() { + // Since the lock is being released the owner_ field should no longer be + // considered valid. We don't protect writing to has_owner_ here, as it's + // the caller's responsibility to ensure that the current thread holds the + // mutex when this is called. + has_owner_ = false; + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_)); + } + + // Does nothing if the current thread holds the mutex. Otherwise, crashes + // with high probability. + void AssertHeld() const { + GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self())) + << "The current thread is not holding the mutex @" << this; + } + + // A static mutex may be used before main() is entered. It may even + // be used before the dynamic initialization stage. Therefore we + // must be able to initialize a static mutex object at link time. + // This means MutexBase has to be a POD and its member variables + // have to be public. + public: + pthread_mutex_t mutex_; // The underlying pthread mutex. + // has_owner_ indicates whether the owner_ field below contains a valid thread + // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All + // accesses to the owner_ field should be protected by a check of this field. + // An alternative might be to memset() owner_ to all zeros, but there's no + // guarantee that a zero'd pthread_t is necessarily invalid or even different + // from pthread_self(). + bool has_owner_; + pthread_t owner_; // The thread holding the mutex. +}; + +// Forward-declares a static mutex. +# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ + extern ::testing::internal::MutexBase mutex + +// Defines and statically (i.e. at link time) initializes a static mutex. +// The initialization list here does not explicitly initialize each field, +// instead relying on default initialization for the unspecified fields. In +// particular, the owner_ field (a pthread_t) is not explicitly initialized. +// This allows initialization to work whether pthread_t is a scalar or struct. +// The flag -Wmissing-field-initializers must not be specified for this to work. +# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ + ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false } + +// The Mutex class can only be used for mutexes created at runtime. It +// shares its API with MutexBase otherwise. +class Mutex : public MutexBase { + public: + Mutex() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); + has_owner_ = false; + } + ~Mutex() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); +}; + +// We cannot name this class MutexLock as the ctor declaration would +// conflict with a macro named MutexLock, which is defined on some +// platforms. Hence the typedef trick below. +class GTestMutexLock { + public: + explicit GTestMutexLock(MutexBase* mutex) + : mutex_(mutex) { mutex_->Lock(); } + + ~GTestMutexLock() { mutex_->Unlock(); } + + private: + MutexBase* const mutex_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock); +}; + +typedef GTestMutexLock MutexLock; + +// Helpers for ThreadLocal. + +// pthread_key_create() requires DeleteThreadLocalValue() to have +// C-linkage. Therefore it cannot be templatized to access +// ThreadLocal<T>. Hence the need for class +// ThreadLocalValueHolderBase. +class ThreadLocalValueHolderBase { + public: + virtual ~ThreadLocalValueHolderBase() {} +}; + +// Called by pthread to delete thread-local data stored by +// pthread_setspecific(). +extern "C" inline void DeleteThreadLocalValue(void* value_holder) { + delete static_cast<ThreadLocalValueHolderBase*>(value_holder); +} + +// Implements thread-local storage on pthreads-based systems. +// +// // Thread 1 +// ThreadLocal<int> tl(100); // 100 is the default value for each thread. +// +// // Thread 2 +// tl.set(150); // Changes the value for thread 2 only. +// EXPECT_EQ(150, tl.get()); +// +// // Thread 1 +// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value. +// tl.set(200); +// EXPECT_EQ(200, tl.get()); +// +// The template type argument T must have a public copy constructor. +// In addition, the default ThreadLocal constructor requires T to have +// a public default constructor. +// +// An object managed for a thread by a ThreadLocal instance is deleted +// when the thread exits. Or, if the ThreadLocal instance dies in +// that thread, when the ThreadLocal dies. It's the user's +// responsibility to ensure that all other threads using a ThreadLocal +// have exited when it dies, or the per-thread objects for those +// threads will not be deleted. +// +// Google Test only uses global ThreadLocal objects. That means they +// will die after main() has returned. Therefore, no per-thread +// object managed by Google Test will be leaked as long as all threads +// using Google Test have exited when main() returns. +template <typename T> +class ThreadLocal { + public: + ThreadLocal() : key_(CreateKey()), + default_() {} + explicit ThreadLocal(const T& value) : key_(CreateKey()), + default_(value) {} + + ~ThreadLocal() { + // Destroys the managed object for the current thread, if any. + DeleteThreadLocalValue(pthread_getspecific(key_)); + + // Releases resources associated with the key. This will *not* + // delete managed objects for other threads. + GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_)); + } + + T* pointer() { return GetOrCreateValue(); } + const T* pointer() const { return GetOrCreateValue(); } + const T& get() const { return *pointer(); } + void set(const T& value) { *pointer() = value; } + + private: + // Holds a value of type T. + class ValueHolder : public ThreadLocalValueHolderBase { + public: + explicit ValueHolder(const T& value) : value_(value) {} + + T* pointer() { return &value_; } + + private: + T value_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder); + }; + + static pthread_key_t CreateKey() { + pthread_key_t key; + // When a thread exits, DeleteThreadLocalValue() will be called on + // the object managed for that thread. + GTEST_CHECK_POSIX_SUCCESS_( + pthread_key_create(&key, &DeleteThreadLocalValue)); + return key; + } + + T* GetOrCreateValue() const { + ThreadLocalValueHolderBase* const holder = + static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_)); + if (holder != NULL) { + return CheckedDowncastToActualType<ValueHolder>(holder)->pointer(); + } + + ValueHolder* const new_holder = new ValueHolder(default_); + ThreadLocalValueHolderBase* const holder_base = new_holder; + GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base)); + return new_holder->pointer(); + } + + // A key pthreads uses for looking up per-thread values. + const pthread_key_t key_; + const T default_; // The default value for each thread. + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal); +}; + +# define GTEST_IS_THREADSAFE 1 + +#else // GTEST_HAS_PTHREAD + +// A dummy implementation of synchronization primitives (mutex, lock, +// and thread-local variable). Necessary for compiling Google Test where +// mutex is not supported - using Google Test in multiple threads is not +// supported on such platforms. + +class Mutex { + public: + Mutex() {} + void Lock() {} + void Unlock() {} + void AssertHeld() const {} +}; + +# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ + extern ::testing::internal::Mutex mutex + +# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex + +class GTestMutexLock { + public: + explicit GTestMutexLock(Mutex*) {} // NOLINT +}; + +typedef GTestMutexLock MutexLock; + +template <typename T> +class ThreadLocal { + public: + ThreadLocal() : value_() {} + explicit ThreadLocal(const T& value) : value_(value) {} + T* pointer() { return &value_; } + const T* pointer() const { return &value_; } + const T& get() const { return value_; } + void set(const T& value) { value_ = value; } + private: + T value_; +}; + +// The above synchronization primitives have dummy implementations. +// Therefore Google Test is not thread-safe. +# define GTEST_IS_THREADSAFE 0 + +#endif // GTEST_HAS_PTHREAD + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +GTEST_API_ size_t GetThreadCount(); + +// Passing non-POD classes through ellipsis (...) crashes the ARM +// compiler and generates a warning in Sun Studio. The Nokia Symbian +// and the IBM XL C/C++ compiler try to instantiate a copy constructor +// for objects passed through ellipsis (...), failing for uncopyable +// objects. We define this to ensure that only POD is passed through +// ellipsis on these systems. +#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC) +// We lose support for NULL detection where the compiler doesn't like +// passing non-POD classes through ellipsis (...). +# define GTEST_ELLIPSIS_NEEDS_POD_ 1 +#else +# define GTEST_CAN_COMPARE_NULL 1 +#endif + +// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between +// const T& and const T* in a function template. These compilers +// _can_ decide between class template specializations for T and T*, +// so a tr1::type_traits-like is_pointer works. +#if defined(__SYMBIAN32__) || defined(__IBMCPP__) +# define GTEST_NEEDS_IS_POINTER_ 1 +#endif + +template <bool bool_value> +struct bool_constant { + typedef bool_constant<bool_value> type; + static const bool value = bool_value; +}; +template <bool bool_value> const bool bool_constant<bool_value>::value; + +typedef bool_constant<false> false_type; +typedef bool_constant<true> true_type; + +template <typename T> +struct is_pointer : public false_type {}; + +template <typename T> +struct is_pointer<T*> : public true_type {}; + +template <typename Iterator> +struct IteratorTraits { + typedef typename Iterator::value_type value_type; +}; + +template <typename T> +struct IteratorTraits<T*> { + typedef T value_type; +}; + +template <typename T> +struct IteratorTraits<const T*> { + typedef T value_type; +}; + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_SEP_ "\\" +# define GTEST_HAS_ALT_PATH_SEP_ 1 +// The biggest signed integer type the compiler supports. +typedef __int64 BiggestInt; +#else +# define GTEST_PATH_SEP_ "/" +# define GTEST_HAS_ALT_PATH_SEP_ 0 +typedef long long BiggestInt; // NOLINT +#endif // GTEST_OS_WINDOWS + +// Utilities for char. + +// isspace(int ch) and friends accept an unsigned char or EOF. char +// may be signed, depending on the compiler (or compiler flags). +// Therefore we need to cast a char to unsigned char before calling +// isspace(), etc. + +inline bool IsAlpha(char ch) { + return isalpha(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsAlNum(char ch) { + return isalnum(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsDigit(char ch) { + return isdigit(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsLower(char ch) { + return islower(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsSpace(char ch) { + return isspace(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsUpper(char ch) { + return isupper(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsXDigit(char ch) { + return isxdigit(static_cast<unsigned char>(ch)) != 0; +} +inline bool IsXDigit(wchar_t ch) { + const unsigned char low_byte = static_cast<unsigned char>(ch); + return ch == low_byte && isxdigit(low_byte) != 0; +} + +inline char ToLower(char ch) { + return static_cast<char>(tolower(static_cast<unsigned char>(ch))); +} +inline char ToUpper(char ch) { + return static_cast<char>(toupper(static_cast<unsigned char>(ch))); +} + +// The testing::internal::posix namespace holds wrappers for common +// POSIX functions. These wrappers hide the differences between +// Windows/MSVC and POSIX systems. Since some compilers define these +// standard functions as macros, the wrapper cannot have the same name +// as the wrapped function. + +namespace posix { + +// Functions with a different name on Windows. + +#if GTEST_OS_WINDOWS + +typedef struct _stat StatStruct; + +# ifdef __BORLANDC__ +inline int IsATTY(int fd) { return isatty(fd); } +inline int StrCaseCmp(const char* s1, const char* s2) { + return stricmp(s1, s2); +} +inline char* StrDup(const char* src) { return strdup(src); } +# else // !__BORLANDC__ +# if GTEST_OS_WINDOWS_MOBILE +inline int IsATTY(int /* fd */) { return 0; } +# else +inline int IsATTY(int fd) { return _isatty(fd); } +# endif // GTEST_OS_WINDOWS_MOBILE +inline int StrCaseCmp(const char* s1, const char* s2) { + return _stricmp(s1, s2); +} +inline char* StrDup(const char* src) { return _strdup(src); } +# endif // __BORLANDC__ + +# if GTEST_OS_WINDOWS_MOBILE +inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); } +// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this +// time and thus not defined there. +# else +inline int FileNo(FILE* file) { return _fileno(file); } +inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); } +inline int RmDir(const char* dir) { return _rmdir(dir); } +inline bool IsDir(const StatStruct& st) { + return (_S_IFDIR & st.st_mode) != 0; +} +# endif // GTEST_OS_WINDOWS_MOBILE + +#else + +typedef struct stat StatStruct; + +inline int FileNo(FILE* file) { return fileno(file); } +inline int IsATTY(int fd) { return isatty(fd); } +inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); } +inline int StrCaseCmp(const char* s1, const char* s2) { + return strcasecmp(s1, s2); +} +inline char* StrDup(const char* src) { return strdup(src); } +inline int RmDir(const char* dir) { return rmdir(dir); } +inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); } + +#endif // GTEST_OS_WINDOWS + +// Functions deprecated by MSVC 8.0. + +#ifdef _MSC_VER +// Temporarily disable warning 4996 (deprecated function). +# pragma warning(push) +# pragma warning(disable:4996) +#endif + +inline const char* StrNCpy(char* dest, const char* src, size_t n) { + return strncpy(dest, src, n); +} + +// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and +// StrError() aren't needed on Windows CE at this time and thus not +// defined there. + +#if !GTEST_OS_WINDOWS_MOBILE +inline int ChDir(const char* dir) { return chdir(dir); } +#endif +inline FILE* FOpen(const char* path, const char* mode) { + return fopen(path, mode); +} +#if !GTEST_OS_WINDOWS_MOBILE +inline FILE *FReopen(const char* path, const char* mode, FILE* stream) { + return freopen(path, mode, stream); +} +inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); } +#endif +inline int FClose(FILE* fp) { return fclose(fp); } +#if !GTEST_OS_WINDOWS_MOBILE +inline int Read(int fd, void* buf, unsigned int count) { + return static_cast<int>(read(fd, buf, count)); +} +inline int Write(int fd, const void* buf, unsigned int count) { + return static_cast<int>(write(fd, buf, count)); +} +inline int Close(int fd) { return close(fd); } +inline const char* StrError(int errnum) { return strerror(errnum); } +#endif +inline const char* GetEnv(const char* name) { +#if GTEST_OS_WINDOWS_MOBILE + // We are on Windows CE, which has no environment variables. + return NULL; +#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) + // Environment variables which we programmatically clear will be set to the + // empty string rather than unset (NULL). Handle that case. + const char* const env = getenv(name); + return (env != NULL && env[0] != '\0') ? env : NULL; +#else + return getenv(name); +#endif +} + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif + +#if GTEST_OS_WINDOWS_MOBILE +// Windows CE has no C library. The abort() function is used in +// several places in Google Test. This implementation provides a reasonable +// imitation of standard behaviour. +void Abort(); +#else +inline void Abort() { abort(); } +#endif // GTEST_OS_WINDOWS_MOBILE + +} // namespace posix + +// MSVC "deprecates" snprintf and issues warnings wherever it is used. In +// order to avoid these warnings, we need to use _snprintf or _snprintf_s on +// MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate +// function in order to achieve that. We use macro definition here because +// snprintf is a variadic function. +#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE +// MSVC 2005 and above support variadic macros. +# define GTEST_SNPRINTF_(buffer, size, format, ...) \ + _snprintf_s(buffer, size, size, format, __VA_ARGS__) +#elif defined(_MSC_VER) +// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't +// complain about _snprintf. +# define GTEST_SNPRINTF_ _snprintf +#else +# define GTEST_SNPRINTF_ snprintf +#endif + +// The maximum number a BiggestInt can represent. This definition +// works no matter BiggestInt is represented in one's complement or +// two's complement. +// +// We cannot rely on numeric_limits in STL, as __int64 and long long +// are not part of standard C++ and numeric_limits doesn't need to be +// defined for them. +const BiggestInt kMaxBiggestInt = + ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1)); + +// This template class serves as a compile-time function from size to +// type. It maps a size in bytes to a primitive type with that +// size. e.g. +// +// TypeWithSize<4>::UInt +// +// is typedef-ed to be unsigned int (unsigned integer made up of 4 +// bytes). +// +// Such functionality should belong to STL, but I cannot find it +// there. +// +// Google Test uses this class in the implementation of floating-point +// comparison. +// +// For now it only handles UInt (unsigned int) as that's all Google Test +// needs. Other types can be easily added in the future if need +// arises. +template <size_t size> +class TypeWithSize { + public: + // This prevents the user from using TypeWithSize<N> with incorrect + // values of N. + typedef void UInt; +}; + +// The specialization for size 4. +template <> +class TypeWithSize<4> { + public: + // unsigned int has size 4 in both gcc and MSVC. + // + // As base/basictypes.h doesn't compile on Windows, we cannot use + // uint32, uint64, and etc here. + typedef int Int; + typedef unsigned int UInt; +}; + +// The specialization for size 8. +template <> +class TypeWithSize<8> { + public: +#if GTEST_OS_WINDOWS + typedef __int64 Int; + typedef unsigned __int64 UInt; +#else + typedef long long Int; // NOLINT + typedef unsigned long long UInt; // NOLINT +#endif // GTEST_OS_WINDOWS +}; + +// Integer types of known sizes. +typedef TypeWithSize<4>::Int Int32; +typedef TypeWithSize<4>::UInt UInt32; +typedef TypeWithSize<8>::Int Int64; +typedef TypeWithSize<8>::UInt UInt64; +typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. + +// Utilities for command line flags and environment variables. + +// Macro for referencing flags. +#define GTEST_FLAG(name) FLAGS_gtest_##name + +// Macros for declaring flags. +#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name) +#define GTEST_DECLARE_int32_(name) \ + GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name) +#define GTEST_DECLARE_string_(name) \ + GTEST_API_ extern ::std::string GTEST_FLAG(name) + +// Macros for defining flags. +#define GTEST_DEFINE_bool_(name, default_val, doc) \ + GTEST_API_ bool GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_int32_(name, default_val, doc) \ + GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_string_(name, default_val, doc) \ + GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val) + +// Thread annotations +#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks) +#define GTEST_LOCK_EXCLUDED_(locks) + +// Parses 'str' for a 32-bit signed integer. If successful, writes the result +// to *value and returns true; otherwise leaves *value unchanged and returns +// false. +// TODO(chandlerc): Find a better way to refactor flag and environment parsing +// out of both gtest-port.cc and gtest.cc to avoid exporting this utility +// function. +bool ParseInt32(const Message& src_text, const char* str, Int32* value); + +// Parses a bool/Int32/string from the environment variable +// corresponding to the given Google Test flag. +bool BoolFromGTestEnv(const char* flag, bool default_val); +GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); +const char* StringFromGTestEnv(const char* flag, const char* default_val); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ + +#if GTEST_OS_LINUX +# include <stdlib.h> +# include <sys/types.h> +# include <sys/wait.h> +# include <unistd.h> +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include <stdexcept> +#endif + +#include <ctype.h> +#include <float.h> +#include <string.h> +#include <iomanip> +#include <limits> +#include <set> + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the Message class. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! + +#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ +#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ + +#include <limits> + + +// Ensures that there is at least one operator<< in the global namespace. +// See Message& operator<<(...) below for why. +void operator<<(const testing::internal::Secret&, int); + +namespace testing { + +// The Message class works like an ostream repeater. +// +// Typical usage: +// +// 1. You stream a bunch of values to a Message object. +// It will remember the text in a stringstream. +// 2. Then you stream the Message object to an ostream. +// This causes the text in the Message to be streamed +// to the ostream. +// +// For example; +// +// testing::Message foo; +// foo << 1 << " != " << 2; +// std::cout << foo; +// +// will print "1 != 2". +// +// Message is not intended to be inherited from. In particular, its +// destructor is not virtual. +// +// Note that stringstream behaves differently in gcc and in MSVC. You +// can stream a NULL char pointer to it in the former, but not in the +// latter (it causes an access violation if you do). The Message +// class hides this difference by treating a NULL char pointer as +// "(null)". +class GTEST_API_ Message { + private: + // The type of basic IO manipulators (endl, ends, and flush) for + // narrow streams. + typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&); + + public: + // Constructs an empty Message. + Message(); + + // Copy constructor. + Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT + *ss_ << msg.GetString(); + } + + // Constructs a Message from a C-string. + explicit Message(const char* str) : ss_(new ::std::stringstream) { + *ss_ << str; + } + +#if GTEST_OS_SYMBIAN + // Streams a value (either a pointer or not) to this object. + template <typename T> + inline Message& operator <<(const T& value) { + StreamHelper(typename internal::is_pointer<T>::type(), value); + return *this; + } +#else + // Streams a non-pointer value to this object. + template <typename T> + inline Message& operator <<(const T& val) { + // Some libraries overload << for STL containers. These + // overloads are defined in the global namespace instead of ::std. + // + // C++'s symbol lookup rule (i.e. Koenig lookup) says that these + // overloads are visible in either the std namespace or the global + // namespace, but not other namespaces, including the testing + // namespace which Google Test's Message class is in. + // + // To allow STL containers (and other types that has a << operator + // defined in the global namespace) to be used in Google Test + // assertions, testing::Message must access the custom << operator + // from the global namespace. With this using declaration, + // overloads of << defined in the global namespace and those + // visible via Koenig lookup are both exposed in this function. + using ::operator <<; + *ss_ << val; + return *this; + } + + // Streams a pointer value to this object. + // + // This function is an overload of the previous one. When you + // stream a pointer to a Message, this definition will be used as it + // is more specialized. (The C++ Standard, section + // [temp.func.order].) If you stream a non-pointer, then the + // previous definition will be used. + // + // The reason for this overload is that streaming a NULL pointer to + // ostream is undefined behavior. Depending on the compiler, you + // may get "0", "(nil)", "(null)", or an access violation. To + // ensure consistent result across compilers, we always treat NULL + // as "(null)". + template <typename T> + inline Message& operator <<(T* const& pointer) { // NOLINT + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + *ss_ << pointer; + } + return *this; + } +#endif // GTEST_OS_SYMBIAN + + // Since the basic IO manipulators are overloaded for both narrow + // and wide streams, we have to provide this specialized definition + // of operator <<, even though its body is the same as the + // templatized version above. Without this definition, streaming + // endl or other basic IO manipulators to Message will confuse the + // compiler. + Message& operator <<(BasicNarrowIoManip val) { + *ss_ << val; + return *this; + } + + // Instead of 1/0, we want to see true/false for bool values. + Message& operator <<(bool b) { + return *this << (b ? "true" : "false"); + } + + // These two overloads allow streaming a wide C string to a Message + // using the UTF-8 encoding. + Message& operator <<(const wchar_t* wide_c_str); + Message& operator <<(wchar_t* wide_c_str); + +#if GTEST_HAS_STD_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::std::wstring& wstr); +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::wstring& wstr); +#endif // GTEST_HAS_GLOBAL_WSTRING + + // Gets the text streamed to this object so far as an std::string. + // Each '\0' character in the buffer is replaced with "\\0". + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + std::string GetString() const; + + private: + +#if GTEST_OS_SYMBIAN + // These are needed as the Nokia Symbian Compiler cannot decide between + // const T& and const T* in a function template. The Nokia compiler _can_ + // decide between class template specializations for T and T*, so a + // tr1::type_traits-like is_pointer works, and we can overload on that. + template <typename T> + inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) { + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + *ss_ << pointer; + } + } + template <typename T> + inline void StreamHelper(internal::false_type /*is_pointer*/, + const T& value) { + // See the comments in Message& operator <<(const T&) above for why + // we need this using statement. + using ::operator <<; + *ss_ << value; + } +#endif // GTEST_OS_SYMBIAN + + // We'll hold the text streamed to this object here. + const internal::scoped_ptr< ::std::stringstream> ss_; + + // We declare (but don't implement) this to prevent the compiler + // from implementing the assignment operator. + void operator=(const Message&); +}; + +// Streams a Message to an ostream. +inline std::ostream& operator <<(std::ostream& os, const Message& sb) { + return os << sb.GetString(); +} + +namespace internal { + +// Converts a streamable value to an std::string. A NULL pointer is +// converted to "(null)". When the input value is a ::string, +// ::std::string, ::wstring, or ::std::wstring object, each NUL +// character in it is replaced with "\\0". +template <typename T> +std::string StreamableToString(const T& streamable) { + return (Message() << streamable).GetString(); +} + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares the String class and functions used internally by +// Google Test. They are subject to change without notice. They should not used +// by code external to Google Test. +// +// This header file is #included by <gtest/internal/gtest-internal.h>. +// It should not be #included by other files. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ + +#ifdef __BORLANDC__ +// string.h is not guaranteed to provide strcpy on C++ Builder. +# include <mem.h> +#endif + +#include <string.h> +#include <string> + + +namespace testing { +namespace internal { + +// String - an abstract class holding static string utilities. +class GTEST_API_ String { + public: + // Static utility methods + + // Clones a 0-terminated C string, allocating memory using new. The + // caller is responsible for deleting the return value using + // delete[]. Returns the cloned string, or NULL if the input is + // NULL. + // + // This is different from strdup() in string.h, which allocates + // memory using malloc(). + static const char* CloneCString(const char* c_str); + +#if GTEST_OS_WINDOWS_MOBILE + // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be + // able to pass strings to Win32 APIs on CE we need to convert them + // to 'Unicode', UTF-16. + + // Creates a UTF-16 wide string from the given ANSI string, allocating + // memory using new. The caller is responsible for deleting the return + // value using delete[]. Returns the wide string, or NULL if the + // input is NULL. + // + // The wide string is created using the ANSI codepage (CP_ACP) to + // match the behaviour of the ANSI versions of Win32 calls and the + // C runtime. + static LPCWSTR AnsiToUtf16(const char* c_str); + + // Creates an ANSI string from the given wide string, allocating + // memory using new. The caller is responsible for deleting the return + // value using delete[]. Returns the ANSI string, or NULL if the + // input is NULL. + // + // The returned string is created using the ANSI codepage (CP_ACP) to + // match the behaviour of the ANSI versions of Win32 calls and the + // C runtime. + static const char* Utf16ToAnsi(LPCWSTR utf16_str); +#endif + + // Compares two C strings. Returns true iff they have the same content. + // + // Unlike strcmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CStringEquals(const char* lhs, const char* rhs); + + // Converts a wide C string to a String using the UTF-8 encoding. + // NULL will be converted to "(null)". If an error occurred during + // the conversion, "(failed to convert from wide string)" is + // returned. + static std::string ShowWideCString(const wchar_t* wide_c_str); + + // Compares two wide C strings. Returns true iff they have the same + // content. + // + // Unlike wcscmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); + + // Compares two C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike strcasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CaseInsensitiveCStringEquals(const char* lhs, + const char* rhs); + + // Compares two wide C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike wcscasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL wide C string, + // including the empty string. + // NB: The implementations on different platforms slightly differ. + // On windows, this method uses _wcsicmp which compares according to LC_CTYPE + // environment variable. On GNU platform this method uses wcscasecmp + // which compares according to LC_CTYPE category of the current locale. + // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the + // current locale. + static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs, + const wchar_t* rhs); + + // Returns true iff the given string ends with the given suffix, ignoring + // case. Any string is considered to end with an empty suffix. + static bool EndsWithCaseInsensitive( + const std::string& str, const std::string& suffix); + + // Formats an int value as "%02d". + static std::string FormatIntWidth2(int value); // "%02d" for width == 2 + + // Formats an int value as "%X". + static std::string FormatHexInt(int value); + + // Formats a byte as "%02X". + static std::string FormatByte(unsigned char value); + + private: + String(); // Not meant to be instantiated. +}; // class String + +// Gets the content of the stringstream's buffer as an std::string. Each '\0' +// character in the buffer is replaced with "\\0". +GTEST_API_ std::string StringStreamToString(::std::stringstream* stream); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: keith.ray@gmail.com (Keith Ray) +// +// Google Test filepath utilities +// +// This header file declares classes and functions used internally by +// Google Test. They are subject to change without notice. +// +// This file is #included in <gtest/internal/gtest-internal.h>. +// Do not include this header file separately! + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ + + +namespace testing { +namespace internal { + +// FilePath - a class for file and directory pathname manipulation which +// handles platform-specific conventions (like the pathname separator). +// Used for helper functions for naming files in a directory for xml output. +// Except for Set methods, all methods are const or static, which provides an +// "immutable value object" -- useful for peace of mind. +// A FilePath with a value ending in a path separator ("like/this/") represents +// a directory, otherwise it is assumed to represent a file. In either case, +// it may or may not represent an actual file or directory in the file system. +// Names are NOT checked for syntax correctness -- no checking for illegal +// characters, malformed paths, etc. + +class GTEST_API_ FilePath { + public: + FilePath() : pathname_("") { } + FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { } + + explicit FilePath(const std::string& pathname) : pathname_(pathname) { + Normalize(); + } + + FilePath& operator=(const FilePath& rhs) { + Set(rhs); + return *this; + } + + void Set(const FilePath& rhs) { + pathname_ = rhs.pathname_; + } + + const std::string& string() const { return pathname_; } + const char* c_str() const { return pathname_.c_str(); } + + // Returns the current working directory, or "" if unsuccessful. + static FilePath GetCurrentDir(); + + // Given directory = "dir", base_name = "test", number = 0, + // extension = "xml", returns "dir/test.xml". If number is greater + // than zero (e.g., 12), returns "dir/test_12.xml". + // On Windows platform, uses \ as the separator rather than /. + static FilePath MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension); + + // Given directory = "dir", relative_path = "test.xml", + // returns "dir/test.xml". + // On Windows, uses \ as the separator rather than /. + static FilePath ConcatPaths(const FilePath& directory, + const FilePath& relative_path); + + // Returns a pathname for a file that does not currently exist. The pathname + // will be directory/base_name.extension or + // directory/base_name_<number>.extension if directory/base_name.extension + // already exists. The number will be incremented until a pathname is found + // that does not already exist. + // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. + // There could be a race condition if two or more processes are calling this + // function at the same time -- they could both pick the same filename. + static FilePath GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension); + + // Returns true iff the path is "". + bool IsEmpty() const { return pathname_.empty(); } + + // If input name has a trailing separator character, removes it and returns + // the name, otherwise return the name string unmodified. + // On Windows platform, uses \ as the separator, other platforms use /. + FilePath RemoveTrailingPathSeparator() const; + + // Returns a copy of the FilePath with the directory part removed. + // Example: FilePath("path/to/file").RemoveDirectoryName() returns + // FilePath("file"). If there is no directory part ("just_a_file"), it returns + // the FilePath unmodified. If there is no file part ("just_a_dir/") it + // returns an empty FilePath (""). + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveDirectoryName() const; + + // RemoveFileName returns the directory path with the filename removed. + // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". + // If the FilePath is "a_file" or "/a_file", RemoveFileName returns + // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does + // not have a file, like "just/a/dir/", it returns the FilePath unmodified. + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveFileName() const; + + // Returns a copy of the FilePath with the case-insensitive extension removed. + // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns + // FilePath("dir/file"). If a case-insensitive extension is not + // found, returns a copy of the original FilePath. + FilePath RemoveExtension(const char* extension) const; + + // Creates directories so that path exists. Returns true if successful or if + // the directories already exist; returns false if unable to create + // directories for any reason. Will also return false if the FilePath does + // not represent a directory (that is, it doesn't end with a path separator). + bool CreateDirectoriesRecursively() const; + + // Create the directory so that path exists. Returns true if successful or + // if the directory already exists; returns false if unable to create the + // directory for any reason, including if the parent directory does not + // exist. Not named "CreateDirectory" because that's a macro on Windows. + bool CreateFolder() const; + + // Returns true if FilePath describes something in the file-system, + // either a file, directory, or whatever, and that something exists. + bool FileOrDirectoryExists() const; + + // Returns true if pathname describes a directory in the file-system + // that exists. + bool DirectoryExists() const; + + // Returns true if FilePath ends with a path separator, which indicates that + // it is intended to represent a directory. Returns false otherwise. + // This does NOT check that a directory (or file) actually exists. + bool IsDirectory() const; + + // Returns true if pathname describes a root directory. (Windows has one + // root directory per disk drive.) + bool IsRootDirectory() const; + + // Returns true if pathname describes an absolute path. + bool IsAbsolutePath() const; + + private: + // Replaces multiple consecutive separators with a single separator. + // For example, "bar///foo" becomes "bar/foo". Does not eliminate other + // redundancies that might be in a pathname involving "." or "..". + // + // A pathname with multiple consecutive separators may occur either through + // user error or as a result of some scripts or APIs that generate a pathname + // with a trailing separator. On other platforms the same API or script + // may NOT generate a pathname with a trailing "/". Then elsewhere that + // pathname may have another "/" and pathname components added to it, + // without checking for the separator already being there. + // The script language and operating system may allow paths like "foo//bar" + // but some of the functions in FilePath will not handle that correctly. In + // particular, RemoveTrailingPathSeparator() only removes one separator, and + // it is called in CreateDirectoriesRecursively() assuming that it will change + // a pathname from directory syntax (trailing separator) to filename syntax. + // + // On Windows this method also replaces the alternate path separator '/' with + // the primary path separator '\\', so that for example "bar\\/\\foo" becomes + // "bar\\foo". + + void Normalize(); + + // Returns a pointer to the last occurence of a valid path separator in + // the FilePath. On Windows, for example, both '/' and '\' are valid path + // separators. Returns NULL if no path separator was found. + const char* FindLastPathSeparator() const; + + std::string pathname_; +}; // class FilePath + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ +// This file was GENERATED by command: +// pump.py gtest-type-util.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Type utilities needed for implementing typed and type-parameterized +// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +// Currently we support at most 50 types in a list, and at most 50 +// type-parameterized tests in one type-parameterized test case. +// Please contact googletestframework@googlegroups.com if you need +// more. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ + + +// #ifdef __GNUC__ is too general here. It is possible to use gcc without using +// libstdc++ (which is where cxxabi.h comes from). +# if GTEST_HAS_CXXABI_H_ +# include <cxxabi.h> +# elif defined(__HP_aCC) +# include <acxx_demangle.h> +# endif // GTEST_HASH_CXXABI_H_ + +namespace testing { +namespace internal { + +// GetTypeName<T>() returns a human-readable name of type T. +// NB: This function is also used in Google Mock, so don't move it inside of +// the typed-test-only section below. +template <typename T> +std::string GetTypeName() { +# if GTEST_HAS_RTTI + + const char* const name = typeid(T).name(); +# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC) + int status = 0; + // gcc's implementation of typeid(T).name() mangles the type name, + // so we have to demangle it. +# if GTEST_HAS_CXXABI_H_ + using abi::__cxa_demangle; +# endif // GTEST_HAS_CXXABI_H_ + char* const readable_name = __cxa_demangle(name, 0, 0, &status); + const std::string name_str(status == 0 ? readable_name : name); + free(readable_name); + return name_str; +# else + return name; +# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC + +# else + + return "<type>"; + +# endif // GTEST_HAS_RTTI +} + +#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same +// type. This can be used as a compile-time assertion to ensure that +// two types are equal. + +template <typename T1, typename T2> +struct AssertTypeEq; + +template <typename T> +struct AssertTypeEq<T, T> { + typedef bool type; +}; + +// A unique type used as the default value for the arguments of class +// template Types. This allows us to simulate variadic templates +// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't +// support directly. +struct None {}; + +// The following family of struct and struct templates are used to +// represent type lists. In particular, TypesN<T1, T2, ..., TN> +// represents a type list with N types (T1, T2, ..., and TN) in it. +// Except for Types0, every struct in the family has two member types: +// Head for the first type in the list, and Tail for the rest of the +// list. + +// The empty type list. +struct Types0 {}; + +// Type lists of length 1, 2, 3, and so on. + +template <typename T1> +struct Types1 { + typedef T1 Head; + typedef Types0 Tail; +}; +template <typename T1, typename T2> +struct Types2 { + typedef T1 Head; + typedef Types1<T2> Tail; +}; + +template <typename T1, typename T2, typename T3> +struct Types3 { + typedef T1 Head; + typedef Types2<T2, T3> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4> +struct Types4 { + typedef T1 Head; + typedef Types3<T2, T3, T4> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5> +struct Types5 { + typedef T1 Head; + typedef Types4<T2, T3, T4, T5> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +struct Types6 { + typedef T1 Head; + typedef Types5<T2, T3, T4, T5, T6> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +struct Types7 { + typedef T1 Head; + typedef Types6<T2, T3, T4, T5, T6, T7> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +struct Types8 { + typedef T1 Head; + typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +struct Types9 { + typedef T1 Head; + typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +struct Types10 { + typedef T1 Head; + typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11> +struct Types11 { + typedef T1 Head; + typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12> +struct Types12 { + typedef T1 Head; + typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13> +struct Types13 { + typedef T1 Head; + typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14> +struct Types14 { + typedef T1 Head; + typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15> +struct Types15 { + typedef T1 Head; + typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +struct Types16 { + typedef T1 Head; + typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +struct Types17 { + typedef T1 Head; + typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18> +struct Types18 { + typedef T1 Head; + typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19> +struct Types19 { + typedef T1 Head; + typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20> +struct Types20 { + typedef T1 Head; + typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21> +struct Types21 { + typedef T1 Head; + typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22> +struct Types22 { + typedef T1 Head; + typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23> +struct Types23 { + typedef T1 Head; + typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24> +struct Types24 { + typedef T1 Head; + typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25> +struct Types25 { + typedef T1 Head; + typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26> +struct Types26 { + typedef T1 Head; + typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27> +struct Types27 { + typedef T1 Head; + typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28> +struct Types28 { + typedef T1 Head; + typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29> +struct Types29 { + typedef T1 Head; + typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30> +struct Types30 { + typedef T1 Head; + typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31> +struct Types31 { + typedef T1 Head; + typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32> +struct Types32 { + typedef T1 Head; + typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33> +struct Types33 { + typedef T1 Head; + typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34> +struct Types34 { + typedef T1 Head; + typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35> +struct Types35 { + typedef T1 Head; + typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36> +struct Types36 { + typedef T1 Head; + typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37> +struct Types37 { + typedef T1 Head; + typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38> +struct Types38 { + typedef T1 Head; + typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39> +struct Types39 { + typedef T1 Head; + typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40> +struct Types40 { + typedef T1 Head; + typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41> +struct Types41 { + typedef T1 Head; + typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42> +struct Types42 { + typedef T1 Head; + typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43> +struct Types43 { + typedef T1 Head; + typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44> +struct Types44 { + typedef T1 Head; + typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45> +struct Types45 { + typedef T1 Head; + typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46> +struct Types46 { + typedef T1 Head; + typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47> +struct Types47 { + typedef T1 Head; + typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48> +struct Types48 { + typedef T1 Head; + typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49> +struct Types49 { + typedef T1 Head; + typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48, T49> Tail; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49, typename T50> +struct Types50 { + typedef T1 Head; + typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48, T49, T50> Tail; +}; + + +} // namespace internal + +// We don't want to require the users to write TypesN<...> directly, +// as that would require them to count the length. Types<...> is much +// easier to write, but generates horrible messages when there is a +// compiler error, as gcc insists on printing out each template +// argument, even if it has the default value (this means Types<int> +// will appear as Types<int, None, None, ..., None> in the compiler +// errors). +// +// Our solution is to combine the best part of the two approaches: a +// user would write Types<T1, ..., TN>, and Google Test will translate +// that to TypesN<T1, ..., TN> internally to make error messages +// readable. The translation is done by the 'type' member of the +// Types template. +template <typename T1 = internal::None, typename T2 = internal::None, + typename T3 = internal::None, typename T4 = internal::None, + typename T5 = internal::None, typename T6 = internal::None, + typename T7 = internal::None, typename T8 = internal::None, + typename T9 = internal::None, typename T10 = internal::None, + typename T11 = internal::None, typename T12 = internal::None, + typename T13 = internal::None, typename T14 = internal::None, + typename T15 = internal::None, typename T16 = internal::None, + typename T17 = internal::None, typename T18 = internal::None, + typename T19 = internal::None, typename T20 = internal::None, + typename T21 = internal::None, typename T22 = internal::None, + typename T23 = internal::None, typename T24 = internal::None, + typename T25 = internal::None, typename T26 = internal::None, + typename T27 = internal::None, typename T28 = internal::None, + typename T29 = internal::None, typename T30 = internal::None, + typename T31 = internal::None, typename T32 = internal::None, + typename T33 = internal::None, typename T34 = internal::None, + typename T35 = internal::None, typename T36 = internal::None, + typename T37 = internal::None, typename T38 = internal::None, + typename T39 = internal::None, typename T40 = internal::None, + typename T41 = internal::None, typename T42 = internal::None, + typename T43 = internal::None, typename T44 = internal::None, + typename T45 = internal::None, typename T46 = internal::None, + typename T47 = internal::None, typename T48 = internal::None, + typename T49 = internal::None, typename T50 = internal::None> +struct Types { + typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type; +}; + +template <> +struct Types<internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types0 type; +}; +template <typename T1> +struct Types<T1, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types1<T1> type; +}; +template <typename T1, typename T2> +struct Types<T1, T2, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types2<T1, T2> type; +}; +template <typename T1, typename T2, typename T3> +struct Types<T1, T2, T3, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types3<T1, T2, T3> type; +}; +template <typename T1, typename T2, typename T3, typename T4> +struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types4<T1, T2, T3, T4> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5> +struct Types<T1, T2, T3, T4, T5, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types5<T1, T2, T3, T4, T5> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types6<T1, T2, T3, T4, T5, T6> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None, internal::None> { + typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + internal::None, internal::None, internal::None, internal::None, + internal::None, internal::None> { + typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45, + internal::None, internal::None, internal::None, internal::None, + internal::None> { + typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45, + T46, internal::None, internal::None, internal::None, internal::None> { + typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45, + T46, T47, internal::None, internal::None, internal::None> { + typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46, T47> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45, + T46, T47, T48, internal::None, internal::None> { + typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46, T47, T48> type; +}; +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49> +struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, + T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30, + T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45, + T46, T47, T48, T49, internal::None> { + typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46, T47, T48, T49> type; +}; + +namespace internal { + +# define GTEST_TEMPLATE_ template <typename T> class + +// The template "selector" struct TemplateSel<Tmpl> is used to +// represent Tmpl, which must be a class template with one type +// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined +// as the type Tmpl<T>. This allows us to actually instantiate the +// template "selected" by TemplateSel<Tmpl>. +// +// This trick is necessary for simulating typedef for class templates, +// which C++ doesn't support directly. +template <GTEST_TEMPLATE_ Tmpl> +struct TemplateSel { + template <typename T> + struct Bind { + typedef Tmpl<T> type; + }; +}; + +# define GTEST_BIND_(TmplSel, T) \ + TmplSel::template Bind<T>::type + +// A unique struct template used as the default value for the +// arguments of class template Templates. This allows us to simulate +// variadic templates (e.g. Templates<int>, Templates<int, double>, +// and etc), which C++ doesn't support directly. +template <typename T> +struct NoneT {}; + +// The following family of struct and struct templates are used to +// represent template lists. In particular, TemplatesN<T1, T2, ..., +// TN> represents a list of N templates (T1, T2, ..., and TN). Except +// for Templates0, every struct in the family has two member types: +// Head for the selector of the first template in the list, and Tail +// for the rest of the list. + +// The empty template list. +struct Templates0 {}; + +// Template lists of length 1, 2, 3, and so on. + +template <GTEST_TEMPLATE_ T1> +struct Templates1 { + typedef TemplateSel<T1> Head; + typedef Templates0 Tail; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2> +struct Templates2 { + typedef TemplateSel<T1> Head; + typedef Templates1<T2> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3> +struct Templates3 { + typedef TemplateSel<T1> Head; + typedef Templates2<T2, T3> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4> +struct Templates4 { + typedef TemplateSel<T1> Head; + typedef Templates3<T2, T3, T4> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5> +struct Templates5 { + typedef TemplateSel<T1> Head; + typedef Templates4<T2, T3, T4, T5> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6> +struct Templates6 { + typedef TemplateSel<T1> Head; + typedef Templates5<T2, T3, T4, T5, T6> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7> +struct Templates7 { + typedef TemplateSel<T1> Head; + typedef Templates6<T2, T3, T4, T5, T6, T7> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8> +struct Templates8 { + typedef TemplateSel<T1> Head; + typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9> +struct Templates9 { + typedef TemplateSel<T1> Head; + typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10> +struct Templates10 { + typedef TemplateSel<T1> Head; + typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11> +struct Templates11 { + typedef TemplateSel<T1> Head; + typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12> +struct Templates12 { + typedef TemplateSel<T1> Head; + typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13> +struct Templates13 { + typedef TemplateSel<T1> Head; + typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14> +struct Templates14 { + typedef TemplateSel<T1> Head; + typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15> +struct Templates15 { + typedef TemplateSel<T1> Head; + typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16> +struct Templates16 { + typedef TemplateSel<T1> Head; + typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17> +struct Templates17 { + typedef TemplateSel<T1> Head; + typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18> +struct Templates18 { + typedef TemplateSel<T1> Head; + typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19> +struct Templates19 { + typedef TemplateSel<T1> Head; + typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20> +struct Templates20 { + typedef TemplateSel<T1> Head; + typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21> +struct Templates21 { + typedef TemplateSel<T1> Head; + typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22> +struct Templates22 { + typedef TemplateSel<T1> Head; + typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23> +struct Templates23 { + typedef TemplateSel<T1> Head; + typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24> +struct Templates24 { + typedef TemplateSel<T1> Head; + typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25> +struct Templates25 { + typedef TemplateSel<T1> Head; + typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26> +struct Templates26 { + typedef TemplateSel<T1> Head; + typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27> +struct Templates27 { + typedef TemplateSel<T1> Head; + typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28> +struct Templates28 { + typedef TemplateSel<T1> Head; + typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29> +struct Templates29 { + typedef TemplateSel<T1> Head; + typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30> +struct Templates30 { + typedef TemplateSel<T1> Head; + typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31> +struct Templates31 { + typedef TemplateSel<T1> Head; + typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32> +struct Templates32 { + typedef TemplateSel<T1> Head; + typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33> +struct Templates33 { + typedef TemplateSel<T1> Head; + typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34> +struct Templates34 { + typedef TemplateSel<T1> Head; + typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35> +struct Templates35 { + typedef TemplateSel<T1> Head; + typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36> +struct Templates36 { + typedef TemplateSel<T1> Head; + typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37> +struct Templates37 { + typedef TemplateSel<T1> Head; + typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38> +struct Templates38 { + typedef TemplateSel<T1> Head; + typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39> +struct Templates39 { + typedef TemplateSel<T1> Head; + typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40> +struct Templates40 { + typedef TemplateSel<T1> Head; + typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41> +struct Templates41 { + typedef TemplateSel<T1> Head; + typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42> +struct Templates42 { + typedef TemplateSel<T1> Head; + typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43> +struct Templates43 { + typedef TemplateSel<T1> Head; + typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44> +struct Templates44 { + typedef TemplateSel<T1> Head; + typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45> +struct Templates45 { + typedef TemplateSel<T1> Head; + typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46> +struct Templates46 { + typedef TemplateSel<T1> Head; + typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45, T46> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47> +struct Templates47 { + typedef TemplateSel<T1> Head; + typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45, T46, T47> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48> +struct Templates48 { + typedef TemplateSel<T1> Head; + typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45, T46, T47, T48> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48, + GTEST_TEMPLATE_ T49> +struct Templates49 { + typedef TemplateSel<T1> Head; + typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45, T46, T47, T48, T49> Tail; +}; + +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48, + GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50> +struct Templates50 { + typedef TemplateSel<T1> Head; + typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43, T44, T45, T46, T47, T48, T49, T50> Tail; +}; + + +// We don't want to require the users to write TemplatesN<...> directly, +// as that would require them to count the length. Templates<...> is much +// easier to write, but generates horrible messages when there is a +// compiler error, as gcc insists on printing out each template +// argument, even if it has the default value (this means Templates<list> +// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler +// errors). +// +// Our solution is to combine the best part of the two approaches: a +// user would write Templates<T1, ..., TN>, and Google Test will translate +// that to TemplatesN<T1, ..., TN> internally to make error messages +// readable. The translation is done by the 'type' member of the +// Templates template. +template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT, + GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT, + GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT, + GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT, + GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT, + GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT, + GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT, + GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT, + GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT, + GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT, + GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT, + GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT, + GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT, + GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT, + GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT, + GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT, + GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT, + GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT, + GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT, + GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT, + GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT, + GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT, + GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT, + GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT, + GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT> +struct Templates { + typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45, T46, T47, T48, T49, T50> type; +}; + +template <> +struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT> { + typedef Templates0 type; +}; +template <GTEST_TEMPLATE_ T1> +struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT> { + typedef Templates1<T1> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2> +struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT> { + typedef Templates2<T1, T2> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3> +struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates3<T1, T2, T3> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4> +struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates4<T1, T2, T3, T4> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5> +struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates5<T1, T2, T3, T4, T5> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6> +struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates6<T1, T2, T3, T4, T5, T6> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7> +struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT> { + typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT> { + typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT> { + typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT> { + typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT> { + typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT> { + typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT> { + typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT> { + typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT> { + typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT> { + typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT> { + typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT> { + typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + T45, NoneT, NoneT, NoneT, NoneT, NoneT> { + typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + T45, T46, NoneT, NoneT, NoneT, NoneT> { + typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45, T46> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + T45, T46, T47, NoneT, NoneT, NoneT> { + typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45, T46, T47> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + T45, T46, T47, T48, NoneT, NoneT> { + typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45, T46, T47, T48> type; +}; +template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3, + GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6, + GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9, + GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12, + GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15, + GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18, + GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21, + GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24, + GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27, + GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30, + GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33, + GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36, + GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39, + GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42, + GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45, + GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48, + GTEST_TEMPLATE_ T49> +struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, + T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, + T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, + T45, T46, T47, T48, T49, NoneT> { + typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42, T43, T44, T45, T46, T47, T48, T49> type; +}; + +// The TypeList template makes it possible to use either a single type +// or a Types<...> list in TYPED_TEST_CASE() and +// INSTANTIATE_TYPED_TEST_CASE_P(). + +template <typename T> +struct TypeList { + typedef Types1<T> type; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49, typename T50> +struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48, T49, T50> > { + typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type; +}; + +#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ + +// Due to C++ preprocessor weirdness, we need double indirection to +// concatenate two tokens when one of them is __LINE__. Writing +// +// foo ## __LINE__ +// +// will result in the token foo__LINE__, instead of foo followed by +// the current line number. For more details, see +// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6 +#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar) +#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar + +class ProtocolMessage; +namespace proto2 { class Message; } + +namespace testing { + +// Forward declarations. + +class AssertionResult; // Result of an assertion. +class Message; // Represents a failure message. +class Test; // Represents a test. +class TestInfo; // Information about a test. +class TestPartResult; // Result of a test part. +class UnitTest; // A collection of test cases. + +template <typename T> +::std::string PrintToString(const T& value); + +namespace internal { + +struct TraceInfo; // Information about a trace point. +class ScopedTrace; // Implements scoped trace. +class TestInfoImpl; // Opaque implementation of TestInfo +class UnitTestImpl; // Opaque implementation of UnitTest + +// How many times InitGoogleTest() has been called. +GTEST_API_ extern int g_init_gtest_count; + +// The text used in failure messages to indicate the start of the +// stack trace. +GTEST_API_ extern const char kStackTraceMarker[]; + +// Two overloaded helpers for checking at compile time whether an +// expression is a null pointer literal (i.e. NULL or any 0-valued +// compile-time integral constant). Their return values have +// different sizes, so we can use sizeof() to test which version is +// picked by the compiler. These helpers have no implementations, as +// we only need their signatures. +// +// Given IsNullLiteralHelper(x), the compiler will pick the first +// version if x can be implicitly converted to Secret*, and pick the +// second version otherwise. Since Secret is a secret and incomplete +// type, the only expression a user can write that has type Secret* is +// a null pointer literal. Therefore, we know that x is a null +// pointer literal if and only if the first version is picked by the +// compiler. +char IsNullLiteralHelper(Secret* p); +char (&IsNullLiteralHelper(...))[2]; // NOLINT + +// A compile-time bool constant that is true if and only if x is a +// null pointer literal (i.e. NULL or any 0-valued compile-time +// integral constant). +#ifdef GTEST_ELLIPSIS_NEEDS_POD_ +// We lose support for NULL detection where the compiler doesn't like +// passing non-POD classes through ellipsis (...). +# define GTEST_IS_NULL_LITERAL_(x) false +#else +# define GTEST_IS_NULL_LITERAL_(x) \ + (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1) +#endif // GTEST_ELLIPSIS_NEEDS_POD_ + +// Appends the user-supplied message to the Google-Test-generated message. +GTEST_API_ std::string AppendUserMessage( + const std::string& gtest_msg, const Message& user_msg); + +#if GTEST_HAS_EXCEPTIONS + +// This exception is thrown by (and only by) a failed Google Test +// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions +// are enabled). We derive it from std::runtime_error, which is for +// errors presumably detectable only at run time. Since +// std::runtime_error inherits from std::exception, many testing +// frameworks know how to extract and print the message inside it. +class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error { + public: + explicit GoogleTestFailureException(const TestPartResult& failure); +}; + +#endif // GTEST_HAS_EXCEPTIONS + +// A helper class for creating scoped traces in user programs. +class GTEST_API_ ScopedTrace { + public: + // The c'tor pushes the given source file location and message onto + // a trace stack maintained by Google Test. + ScopedTrace(const char* file, int line, const Message& message); + + // The d'tor pops the info pushed by the c'tor. + // + // Note that the d'tor is not virtual in order to be efficient. + // Don't inherit from ScopedTrace! + ~ScopedTrace(); + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); +} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its + // c'tor and d'tor. Therefore it doesn't + // need to be used otherwise. + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +GTEST_API_ AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const std::string& expected_value, + const std::string& actual_value, + bool ignoring_case); + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +GTEST_API_ std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value); + +// This template class represents an IEEE floating-point number +// (either single-precision or double-precision, depending on the +// template parameters). +// +// The purpose of this class is to do more sophisticated number +// comparison. (Due to round-off error, etc, it's very unlikely that +// two floating-points will be equal exactly. Hence a naive +// comparison by the == operation often doesn't work.) +// +// Format of IEEE floating-point: +// +// The most-significant bit being the leftmost, an IEEE +// floating-point looks like +// +// sign_bit exponent_bits fraction_bits +// +// Here, sign_bit is a single bit that designates the sign of the +// number. +// +// For float, there are 8 exponent bits and 23 fraction bits. +// +// For double, there are 11 exponent bits and 52 fraction bits. +// +// More details can be found at +// http://en.wikipedia.org/wiki/IEEE_floating-point_standard. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +template <typename RawType> +class FloatingPoint { + public: + // Defines the unsigned integer type that has the same size as the + // floating point number. + typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits; + + // Constants. + + // # of bits in a number. + static const size_t kBitCount = 8*sizeof(RawType); + + // # of fraction bits in a number. + static const size_t kFractionBitCount = + std::numeric_limits<RawType>::digits - 1; + + // # of exponent bits in a number. + static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; + + // The mask for the sign bit. + static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1); + + // The mask for the fraction bits. + static const Bits kFractionBitMask = + ~static_cast<Bits>(0) >> (kExponentBitCount + 1); + + // The mask for the exponent bits. + static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); + + // How many ULP's (Units in the Last Place) we want to tolerate when + // comparing two numbers. The larger the value, the more error we + // allow. A 0 value means that two numbers must be exactly the same + // to be considered equal. + // + // The maximum error of a single floating-point operation is 0.5 + // units in the last place. On Intel CPU's, all floating-point + // calculations are done with 80-bit precision, while double has 64 + // bits. Therefore, 4 should be enough for ordinary use. + // + // See the following article for more details on ULP: + // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + static const size_t kMaxUlps = 4; + + // Constructs a FloatingPoint from a raw floating-point number. + // + // On an Intel CPU, passing a non-normalized NAN (Not a Number) + // around may change its bits, although the new value is guaranteed + // to be also a NAN. Therefore, don't expect this constructor to + // preserve the bits in x when x is a NAN. + explicit FloatingPoint(const RawType& x) { u_.value_ = x; } + + // Static methods + + // Reinterprets a bit pattern as a floating-point number. + // + // This function is needed to test the AlmostEquals() method. + static RawType ReinterpretBits(const Bits bits) { + FloatingPoint fp(0); + fp.u_.bits_ = bits; + return fp.u_.value_; + } + + // Returns the floating-point number that represent positive infinity. + static RawType Infinity() { + return ReinterpretBits(kExponentBitMask); + } + + // Returns the maximum representable finite floating-point number. + static RawType Max(); + + // Non-static methods + + // Returns the bits that represents this number. + const Bits &bits() const { return u_.bits_; } + + // Returns the exponent bits of this number. + Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } + + // Returns the fraction bits of this number. + Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } + + // Returns the sign bit of this number. + Bits sign_bit() const { return kSignBitMask & u_.bits_; } + + // Returns true iff this is NAN (not a number). + bool is_nan() const { + // It's a NAN if the exponent bits are all ones and the fraction + // bits are not entirely zeros. + return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); + } + + // Returns true iff this number is at most kMaxUlps ULP's away from + // rhs. In particular, this function: + // + // - returns false if either number is (or both are) NAN. + // - treats really large numbers as almost equal to infinity. + // - thinks +0.0 and -0.0 are 0 DLP's apart. + bool AlmostEquals(const FloatingPoint& rhs) const { + // The IEEE standard says that any comparison operation involving + // a NAN must return false. + if (is_nan() || rhs.is_nan()) return false; + + return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) + <= kMaxUlps; + } + + private: + // The data type used to store the actual floating-point number. + union FloatingPointUnion { + RawType value_; // The raw floating-point number. + Bits bits_; // The bits that represent the number. + }; + + // Converts an integer from the sign-and-magnitude representation to + // the biased representation. More precisely, let N be 2 to the + // power of (kBitCount - 1), an integer x is represented by the + // unsigned number x + N. + // + // For instance, + // + // -N + 1 (the most negative number representable using + // sign-and-magnitude) is represented by 1; + // 0 is represented by N; and + // N - 1 (the biggest number representable using + // sign-and-magnitude) is represented by 2N - 1. + // + // Read http://en.wikipedia.org/wiki/Signed_number_representations + // for more details on signed number representations. + static Bits SignAndMagnitudeToBiased(const Bits &sam) { + if (kSignBitMask & sam) { + // sam represents a negative number. + return ~sam + 1; + } else { + // sam represents a positive number. + return kSignBitMask | sam; + } + } + + // Given two numbers in the sign-and-magnitude representation, + // returns the distance between them as an unsigned number. + static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, + const Bits &sam2) { + const Bits biased1 = SignAndMagnitudeToBiased(sam1); + const Bits biased2 = SignAndMagnitudeToBiased(sam2); + return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); + } + + FloatingPointUnion u_; +}; + +// We cannot use std::numeric_limits<T>::max() as it clashes with the max() +// macro defined by <windows.h>. +template <> +inline float FloatingPoint<float>::Max() { return FLT_MAX; } +template <> +inline double FloatingPoint<double>::Max() { return DBL_MAX; } + +// Typedefs the instances of the FloatingPoint template class that we +// care to use. +typedef FloatingPoint<float> Float; +typedef FloatingPoint<double> Double; + +// In order to catch the mistake of putting tests that use different +// test fixture classes in the same test case, we need to assign +// unique IDs to fixture classes and compare them. The TypeId type is +// used to hold such IDs. The user should treat TypeId as an opaque +// type: the only operation allowed on TypeId values is to compare +// them for equality using the == operator. +typedef const void* TypeId; + +template <typename T> +class TypeIdHelper { + public: + // dummy_ must not have a const type. Otherwise an overly eager + // compiler (e.g. MSVC 7.1 & 8.0) may try to merge + // TypeIdHelper<T>::dummy_ for different Ts as an "optimization". + static bool dummy_; +}; + +template <typename T> +bool TypeIdHelper<T>::dummy_ = false; + +// GetTypeId<T>() returns the ID of type T. Different values will be +// returned for different types. Calling the function twice with the +// same type argument is guaranteed to return the same ID. +template <typename T> +TypeId GetTypeId() { + // The compiler is required to allocate a different + // TypeIdHelper<T>::dummy_ variable for each T used to instantiate + // the template. Therefore, the address of dummy_ is guaranteed to + // be unique. + return &(TypeIdHelper<T>::dummy_); +} + +// Returns the type ID of ::testing::Test. Always call this instead +// of GetTypeId< ::testing::Test>() to get the type ID of +// ::testing::Test, as the latter may give the wrong result due to a +// suspected linker bug when compiling Google Test as a Mac OS X +// framework. +GTEST_API_ TypeId GetTestTypeId(); + +// Defines the abstract factory interface that creates instances +// of a Test object. +class TestFactoryBase { + public: + virtual ~TestFactoryBase() {} + + // Creates a test instance to run. The instance is both created and destroyed + // within TestInfoImpl::Run() + virtual Test* CreateTest() = 0; + + protected: + TestFactoryBase() {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase); +}; + +// This class provides implementation of TeastFactoryBase interface. +// It is used in TEST and TEST_F macros. +template <class TestClass> +class TestFactoryImpl : public TestFactoryBase { + public: + virtual Test* CreateTest() { return new TestClass; } +}; + +#if GTEST_OS_WINDOWS + +// Predicate-formatters for implementing the HRESULT checking macros +// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED} +// We pass a long instead of HRESULT to avoid causing an +// include dependency for the HRESULT type. +GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr, + long hr); // NOLINT +GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr, + long hr); // NOLINT + +#endif // GTEST_OS_WINDOWS + +// Types of SetUpTestCase() and TearDownTestCase() functions. +typedef void (*SetUpTestCaseFunc)(); +typedef void (*TearDownTestCaseFunc)(); + +// Creates a new TestInfo object and registers it with Google Test; +// returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// type_param the name of the test's type parameter, or NULL if +// this is not a typed or a type-parameterized test. +// value_param text representation of the test's value parameter, +// or NULL if this is not a type-parameterized test. +// fixture_class_id: ID of the test fixture class +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// factory: pointer to the factory that creates a test object. +// The newly created TestInfo instance will assume +// ownership of the factory object. +GTEST_API_ TestInfo* MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + TypeId fixture_class_id, + SetUpTestCaseFunc set_up_tc, + TearDownTestCaseFunc tear_down_tc, + TestFactoryBase* factory); + +// If *pstr starts with the given prefix, modifies *pstr to be right +// past the prefix and returns true; otherwise leaves *pstr unchanged +// and returns false. None of pstr, *pstr, and prefix can be NULL. +GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr); + +#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// State of the definition of a type-parameterized test case. +class GTEST_API_ TypedTestCasePState { + public: + TypedTestCasePState() : registered_(false) {} + + // Adds the given test name to defined_test_names_ and return true + // if the test case hasn't been registered; otherwise aborts the + // program. + bool AddTestName(const char* file, int line, const char* case_name, + const char* test_name) { + if (registered_) { + fprintf(stderr, "%s Test %s must be defined before " + "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n", + FormatFileLocation(file, line).c_str(), test_name, case_name); + fflush(stderr); + posix::Abort(); + } + defined_test_names_.insert(test_name); + return true; + } + + // Verifies that registered_tests match the test names in + // defined_test_names_; returns registered_tests if successful, or + // aborts the program otherwise. + const char* VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests); + + private: + bool registered_; + ::std::set<const char*> defined_test_names_; +}; + +// Skips to the first non-space char after the first comma in 'str'; +// returns NULL if no comma is found in 'str'. +inline const char* SkipComma(const char* str) { + const char* comma = strchr(str, ','); + if (comma == NULL) { + return NULL; + } + while (IsSpace(*(++comma))) {} + return comma; +} + +// Returns the prefix of 'str' before the first comma in it; returns +// the entire string if it contains no comma. +inline std::string GetPrefixUntilComma(const char* str) { + const char* comma = strchr(str, ','); + return comma == NULL ? str : std::string(str, comma); +} + +// TypeParameterizedTest<Fixture, TestSel, Types>::Register() +// registers a list of type-parameterized tests with Google Test. The +// return value is insignificant - we just need to return something +// such that we can call this function in a namespace scope. +// +// Implementation note: The GTEST_TEMPLATE_ macro declares a template +// template parameter. It's defined in gtest-type-util.h. +template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types> +class TypeParameterizedTest { + public: + // 'index' is the index of the test in the type list 'Types' + // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase, + // Types). Valid values for 'index' are [0, N - 1] where N is the + // length of Types. + static bool Register(const char* prefix, const char* case_name, + const char* test_names, int index) { + typedef typename Types::Head Type; + typedef Fixture<Type> FixtureClass; + typedef typename GTEST_BIND_(TestSel, Type) TestClass; + + // First, registers the first type-parameterized test in the type + // list. + MakeAndRegisterTestInfo( + (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/" + + StreamableToString(index)).c_str(), + GetPrefixUntilComma(test_names).c_str(), + GetTypeName<Type>().c_str(), + NULL, // No value parameter. + GetTypeId<FixtureClass>(), + TestClass::SetUpTestCase, + TestClass::TearDownTestCase, + new TestFactoryImpl<TestClass>); + + // Next, recurses (at compile time) with the tail of the type list. + return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail> + ::Register(prefix, case_name, test_names, index + 1); + } +}; + +// The base case for the compile time recursion. +template <GTEST_TEMPLATE_ Fixture, class TestSel> +class TypeParameterizedTest<Fixture, TestSel, Types0> { + public: + static bool Register(const char* /*prefix*/, const char* /*case_name*/, + const char* /*test_names*/, int /*index*/) { + return true; + } +}; + +// TypeParameterizedTestCase<Fixture, Tests, Types>::Register() +// registers *all combinations* of 'Tests' and 'Types' with Google +// Test. The return value is insignificant - we just need to return +// something such that we can call this function in a namespace scope. +template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types> +class TypeParameterizedTestCase { + public: + static bool Register(const char* prefix, const char* case_name, + const char* test_names) { + typedef typename Tests::Head Head; + + // First, register the first test in 'Test' for each type in 'Types'. + TypeParameterizedTest<Fixture, Head, Types>::Register( + prefix, case_name, test_names, 0); + + // Next, recurses (at compile time) with the tail of the test list. + return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types> + ::Register(prefix, case_name, SkipComma(test_names)); + } +}; + +// The base case for the compile time recursion. +template <GTEST_TEMPLATE_ Fixture, typename Types> +class TypeParameterizedTestCase<Fixture, Templates0, Types> { + public: + static bool Register(const char* /*prefix*/, const char* /*case_name*/, + const char* /*test_names*/) { + return true; + } +}; + +#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in +// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. +GTEST_API_ std::string GetCurrentOsStackTraceExceptTop( + UnitTest* unit_test, int skip_count); + +// Helpers for suppressing warnings on unreachable code or constant +// condition. + +// Always returns true. +GTEST_API_ bool AlwaysTrue(); + +// Always returns false. +inline bool AlwaysFalse() { return !AlwaysTrue(); } + +// Helper for suppressing false warning from Clang on a const char* +// variable declared in a conditional expression always being NULL in +// the else branch. +struct GTEST_API_ ConstCharPtr { + ConstCharPtr(const char* str) : value(str) {} + operator bool() const { return true; } + const char* value; +}; + +// A simple Linear Congruential Generator for generating random +// numbers with a uniform distribution. Unlike rand() and srand(), it +// doesn't use global state (and therefore can't interfere with user +// code). Unlike rand_r(), it's portable. An LCG isn't very random, +// but it's good enough for our purposes. +class GTEST_API_ Random { + public: + static const UInt32 kMaxRange = 1u << 31; + + explicit Random(UInt32 seed) : state_(seed) {} + + void Reseed(UInt32 seed) { state_ = seed; } + + // Generates a random number from [0, range). Crashes if 'range' is + // 0 or greater than kMaxRange. + UInt32 Generate(UInt32 range); + + private: + UInt32 state_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(Random); +}; + +// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a +// compiler error iff T1 and T2 are different types. +template <typename T1, typename T2> +struct CompileAssertTypesEqual; + +template <typename T> +struct CompileAssertTypesEqual<T, T> { +}; + +// Removes the reference from a type if it is a reference type, +// otherwise leaves it unchanged. This is the same as +// tr1::remove_reference, which is not widely available yet. +template <typename T> +struct RemoveReference { typedef T type; }; // NOLINT +template <typename T> +struct RemoveReference<T&> { typedef T type; }; // NOLINT + +// A handy wrapper around RemoveReference that works when the argument +// T depends on template parameters. +#define GTEST_REMOVE_REFERENCE_(T) \ + typename ::testing::internal::RemoveReference<T>::type + +// Removes const from a type if it is a const type, otherwise leaves +// it unchanged. This is the same as tr1::remove_const, which is not +// widely available yet. +template <typename T> +struct RemoveConst { typedef T type; }; // NOLINT +template <typename T> +struct RemoveConst<const T> { typedef T type; }; // NOLINT + +// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above +// definition to fail to remove the const in 'const int[3]' and 'const +// char[3][4]'. The following specialization works around the bug. +template <typename T, size_t N> +struct RemoveConst<const T[N]> { + typedef typename RemoveConst<T>::type type[N]; +}; + +#if defined(_MSC_VER) && _MSC_VER < 1400 +// This is the only specialization that allows VC++ 7.1 to remove const in +// 'const int[3] and 'const int[3][4]'. However, it causes trouble with GCC +// and thus needs to be conditionally compiled. +template <typename T, size_t N> +struct RemoveConst<T[N]> { + typedef typename RemoveConst<T>::type type[N]; +}; +#endif + +// A handy wrapper around RemoveConst that works when the argument +// T depends on template parameters. +#define GTEST_REMOVE_CONST_(T) \ + typename ::testing::internal::RemoveConst<T>::type + +// Turns const U&, U&, const U, and U all into U. +#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \ + GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T)) + +// Adds reference to a type if it is not a reference type, +// otherwise leaves it unchanged. This is the same as +// tr1::add_reference, which is not widely available yet. +template <typename T> +struct AddReference { typedef T& type; }; // NOLINT +template <typename T> +struct AddReference<T&> { typedef T& type; }; // NOLINT + +// A handy wrapper around AddReference that works when the argument T +// depends on template parameters. +#define GTEST_ADD_REFERENCE_(T) \ + typename ::testing::internal::AddReference<T>::type + +// Adds a reference to const on top of T as necessary. For example, +// it transforms +// +// char ==> const char& +// const char ==> const char& +// char& ==> const char& +// const char& ==> const char& +// +// The argument T must depend on some template parameters. +#define GTEST_REFERENCE_TO_CONST_(T) \ + GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T)) + +// ImplicitlyConvertible<From, To>::value is a compile-time bool +// constant that's true iff type From can be implicitly converted to +// type To. +template <typename From, typename To> +class ImplicitlyConvertible { + private: + // We need the following helper functions only for their types. + // They have no implementations. + + // MakeFrom() is an expression whose type is From. We cannot simply + // use From(), as the type From may not have a public default + // constructor. + static From MakeFrom(); + + // These two functions are overloaded. Given an expression + // Helper(x), the compiler will pick the first version if x can be + // implicitly converted to type To; otherwise it will pick the + // second version. + // + // The first version returns a value of size 1, and the second + // version returns a value of size 2. Therefore, by checking the + // size of Helper(x), which can be done at compile time, we can tell + // which version of Helper() is used, and hence whether x can be + // implicitly converted to type To. + static char Helper(To); + static char (&Helper(...))[2]; // NOLINT + + // We have to put the 'public' section after the 'private' section, + // or MSVC refuses to compile the code. + public: + // MSVC warns about implicitly converting from double to int for + // possible loss of data, so we need to temporarily disable the + // warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4244) // Temporarily disables warning 4244. + + static const bool value = + sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; +# pragma warning(pop) // Restores the warning state. +#elif defined(__BORLANDC__) + // C++Builder cannot use member overload resolution during template + // instantiation. The simplest workaround is to use its C++0x type traits + // functions (C++Builder 2009 and above only). + static const bool value = __is_convertible(From, To); +#else + static const bool value = + sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; +#endif // _MSV_VER +}; +template <typename From, typename To> +const bool ImplicitlyConvertible<From, To>::value; + +// IsAProtocolMessage<T>::value is a compile-time bool constant that's +// true iff T is type ProtocolMessage, proto2::Message, or a subclass +// of those. +template <typename T> +struct IsAProtocolMessage + : public bool_constant< + ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value || + ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> { +}; + +// When the compiler sees expression IsContainerTest<C>(0), if C is an +// STL-style container class, the first overload of IsContainerTest +// will be viable (since both C::iterator* and C::const_iterator* are +// valid types and NULL can be implicitly converted to them). It will +// be picked over the second overload as 'int' is a perfect match for +// the type of argument 0. If C::iterator or C::const_iterator is not +// a valid type, the first overload is not viable, and the second +// overload will be picked. Therefore, we can determine whether C is +// a container class by checking the type of IsContainerTest<C>(0). +// The value of the expression is insignificant. +// +// Note that we look for both C::iterator and C::const_iterator. The +// reason is that C++ injects the name of a class as a member of the +// class itself (e.g. you can refer to class iterator as either +// 'iterator' or 'iterator::iterator'). If we look for C::iterator +// only, for example, we would mistakenly think that a class named +// iterator is an STL container. +// +// Also note that the simpler approach of overloading +// IsContainerTest(typename C::const_iterator*) and +// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++. +typedef int IsContainer; +template <class C> +IsContainer IsContainerTest(int /* dummy */, + typename C::iterator* /* it */ = NULL, + typename C::const_iterator* /* const_it */ = NULL) { + return 0; +} + +typedef char IsNotContainer; +template <class C> +IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; } + +// EnableIf<condition>::type is void when 'Cond' is true, and +// undefined when 'Cond' is false. To use SFINAE to make a function +// overload only apply when a particular expression is true, add +// "typename EnableIf<expression>::type* = 0" as the last parameter. +template<bool> struct EnableIf; +template<> struct EnableIf<true> { typedef void type; }; // NOLINT + +// Utilities for native arrays. + +// ArrayEq() compares two k-dimensional native arrays using the +// elements' operator==, where k can be any integer >= 0. When k is +// 0, ArrayEq() degenerates into comparing a single pair of values. + +template <typename T, typename U> +bool ArrayEq(const T* lhs, size_t size, const U* rhs); + +// This generic version is used when k is 0. +template <typename T, typename U> +inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; } + +// This overload is used when k >= 1. +template <typename T, typename U, size_t N> +inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) { + return internal::ArrayEq(lhs, N, rhs); +} + +// This helper reduces code bloat. If we instead put its logic inside +// the previous ArrayEq() function, arrays with different sizes would +// lead to different copies of the template code. +template <typename T, typename U> +bool ArrayEq(const T* lhs, size_t size, const U* rhs) { + for (size_t i = 0; i != size; i++) { + if (!internal::ArrayEq(lhs[i], rhs[i])) + return false; + } + return true; +} + +// Finds the first element in the iterator range [begin, end) that +// equals elem. Element may be a native array type itself. +template <typename Iter, typename Element> +Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) { + for (Iter it = begin; it != end; ++it) { + if (internal::ArrayEq(*it, elem)) + return it; + } + return end; +} + +// CopyArray() copies a k-dimensional native array using the elements' +// operator=, where k can be any integer >= 0. When k is 0, +// CopyArray() degenerates into copying a single value. + +template <typename T, typename U> +void CopyArray(const T* from, size_t size, U* to); + +// This generic version is used when k is 0. +template <typename T, typename U> +inline void CopyArray(const T& from, U* to) { *to = from; } + +// This overload is used when k >= 1. +template <typename T, typename U, size_t N> +inline void CopyArray(const T(&from)[N], U(*to)[N]) { + internal::CopyArray(from, N, *to); +} + +// This helper reduces code bloat. If we instead put its logic inside +// the previous CopyArray() function, arrays with different sizes +// would lead to different copies of the template code. +template <typename T, typename U> +void CopyArray(const T* from, size_t size, U* to) { + for (size_t i = 0; i != size; i++) { + internal::CopyArray(from[i], to + i); + } +} + +// The relation between an NativeArray object (see below) and the +// native array it represents. +enum RelationToSource { + kReference, // The NativeArray references the native array. + kCopy // The NativeArray makes a copy of the native array and + // owns the copy. +}; + +// Adapts a native array to a read-only STL-style container. Instead +// of the complete STL container concept, this adaptor only implements +// members useful for Google Mock's container matchers. New members +// should be added as needed. To simplify the implementation, we only +// support Element being a raw type (i.e. having no top-level const or +// reference modifier). It's the client's responsibility to satisfy +// this requirement. Element can be an array type itself (hence +// multi-dimensional arrays are supported). +template <typename Element> +class NativeArray { + public: + // STL-style container typedefs. + typedef Element value_type; + typedef Element* iterator; + typedef const Element* const_iterator; + + // Constructs from a native array. + NativeArray(const Element* array, size_t count, RelationToSource relation) { + Init(array, count, relation); + } + + // Copy constructor. + NativeArray(const NativeArray& rhs) { + Init(rhs.array_, rhs.size_, rhs.relation_to_source_); + } + + ~NativeArray() { + // Ensures that the user doesn't instantiate NativeArray with a + // const or reference type. + static_cast<void>(StaticAssertTypeEqHelper<Element, + GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>()); + if (relation_to_source_ == kCopy) + delete[] array_; + } + + // STL-style container methods. + size_t size() const { return size_; } + const_iterator begin() const { return array_; } + const_iterator end() const { return array_ + size_; } + bool operator==(const NativeArray& rhs) const { + return size() == rhs.size() && + ArrayEq(begin(), size(), rhs.begin()); + } + + private: + // Initializes this object; makes a copy of the input array if + // 'relation' is kCopy. + void Init(const Element* array, size_t a_size, RelationToSource relation) { + if (relation == kReference) { + array_ = array; + } else { + Element* const copy = new Element[a_size]; + CopyArray(array, a_size, copy); + array_ = copy; + } + size_ = a_size; + relation_to_source_ = relation; + } + + const Element* array_; + size_t size_; + RelationToSource relation_to_source_; + + GTEST_DISALLOW_ASSIGN_(NativeArray); +}; + +} // namespace internal +} // namespace testing + +#define GTEST_MESSAGE_AT_(file, line, message, result_type) \ + ::testing::internal::AssertHelper(result_type, file, line, message) \ + = ::testing::Message() + +#define GTEST_MESSAGE_(message, result_type) \ + GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type) + +#define GTEST_FATAL_FAILURE_(message) \ + return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure) + +#define GTEST_NONFATAL_FAILURE_(message) \ + GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure) + +#define GTEST_SUCCESS_(message) \ + GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) + +// Suppresses MSVC warnings 4072 (unreachable code) for the code following +// statement if it returns or throws (or doesn't return or throw in some +// situations). +#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \ + if (::testing::internal::AlwaysTrue()) { statement; } + +#define GTEST_TEST_THROW_(statement, expected_exception, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::ConstCharPtr gtest_msg = "") { \ + bool gtest_caught_expected = false; \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (expected_exception const&) { \ + gtest_caught_expected = true; \ + } \ + catch (...) { \ + gtest_msg.value = \ + "Expected: " #statement " throws an exception of type " \ + #expected_exception ".\n Actual: it throws a different type."; \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ + } \ + if (!gtest_caught_expected) { \ + gtest_msg.value = \ + "Expected: " #statement " throws an exception of type " \ + #expected_exception ".\n Actual: it throws nothing."; \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \ + fail(gtest_msg.value) + +#define GTEST_TEST_NO_THROW_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (...) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \ + fail("Expected: " #statement " doesn't throw an exception.\n" \ + " Actual: it throws.") + +#define GTEST_TEST_ANY_THROW_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + bool gtest_caught_any = false; \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (...) { \ + gtest_caught_any = true; \ + } \ + if (!gtest_caught_any) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \ + fail("Expected: " #statement " throws an exception.\n" \ + " Actual: it doesn't.") + + +// Implements Boolean test assertions such as EXPECT_TRUE. expression can be +// either a boolean expression or an AssertionResult. text is a textual +// represenation of expression as it was passed into the EXPECT_TRUE. +#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (const ::testing::AssertionResult gtest_ar_ = \ + ::testing::AssertionResult(expression)) \ + ; \ + else \ + fail(::testing::internal::GetBoolAssertionFailureMessage(\ + gtest_ar_, text, #actual, #expected).c_str()) + +#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \ + fail("Expected: " #statement " doesn't generate new fatal " \ + "failures in the current thread.\n" \ + " Actual: it does.") + +// Expands to the name of the class that implements the given test. +#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ + test_case_name##_##test_name##_Test + +// Helper macro for defining tests. +#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\ +class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ + public:\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\ + private:\ + virtual void TestBody();\ + static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ +};\ +\ +::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ + ::test_info_ =\ + ::testing::internal::MakeAndRegisterTestInfo(\ + #test_case_name, #test_name, NULL, NULL, \ + (parent_id), \ + parent_class::SetUpTestCase, \ + parent_class::TearDownTestCase, \ + new ::testing::internal::TestFactoryImpl<\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ +void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for death tests. It is +// #included by gtest.h so a user doesn't need to include this +// directly. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines internal utilities needed for implementing +// death tests. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ + + +#include <stdio.h> + +namespace testing { +namespace internal { + +GTEST_DECLARE_string_(internal_run_death_test); + +// Names of the flags (needed for parsing Google Test flags). +const char kDeathTestStyleFlag[] = "death_test_style"; +const char kDeathTestUseFork[] = "death_test_use_fork"; +const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; + +#if GTEST_HAS_DEATH_TEST + +// DeathTest is a class that hides much of the complexity of the +// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method +// returns a concrete class that depends on the prevailing death test +// style, as defined by the --gtest_death_test_style and/or +// --gtest_internal_run_death_test flags. + +// In describing the results of death tests, these terms are used with +// the corresponding definitions: +// +// exit status: The integer exit information in the format specified +// by wait(2) +// exit code: The integer code passed to exit(3), _exit(2), or +// returned from main() +class GTEST_API_ DeathTest { + public: + // Create returns false if there was an error determining the + // appropriate action to take for the current death test; for example, + // if the gtest_death_test_style flag is set to an invalid value. + // The LastMessage method will return a more detailed message in that + // case. Otherwise, the DeathTest pointer pointed to by the "test" + // argument is set. If the death test should be skipped, the pointer + // is set to NULL; otherwise, it is set to the address of a new concrete + // DeathTest object that controls the execution of the current test. + static bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); + DeathTest(); + virtual ~DeathTest() { } + + // A helper class that aborts a death test when it's deleted. + class ReturnSentinel { + public: + explicit ReturnSentinel(DeathTest* test) : test_(test) { } + ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); } + private: + DeathTest* const test_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel); + } GTEST_ATTRIBUTE_UNUSED_; + + // An enumeration of possible roles that may be taken when a death + // test is encountered. EXECUTE means that the death test logic should + // be executed immediately. OVERSEE means that the program should prepare + // the appropriate environment for a child process to execute the death + // test, then wait for it to complete. + enum TestRole { OVERSEE_TEST, EXECUTE_TEST }; + + // An enumeration of the three reasons that a test might be aborted. + enum AbortReason { + TEST_ENCOUNTERED_RETURN_STATEMENT, + TEST_THREW_EXCEPTION, + TEST_DID_NOT_DIE + }; + + // Assumes one of the above roles. + virtual TestRole AssumeRole() = 0; + + // Waits for the death test to finish and returns its status. + virtual int Wait() = 0; + + // Returns true if the death test passed; that is, the test process + // exited during the test, its exit status matches a user-supplied + // predicate, and its stderr output matches a user-supplied regular + // expression. + // The user-supplied predicate may be a macro expression rather + // than a function pointer or functor, or else Wait and Passed could + // be combined. + virtual bool Passed(bool exit_status_ok) = 0; + + // Signals that the death test did not die as expected. + virtual void Abort(AbortReason reason) = 0; + + // Returns a human-readable outcome message regarding the outcome of + // the last death test. + static const char* LastMessage(); + + static void set_last_death_test_message(const std::string& message); + + private: + // A string containing a description of the outcome of the last death test. + static std::string last_death_test_message_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest); +}; + +// Factory interface for death tests. May be mocked out for testing. +class DeathTestFactory { + public: + virtual ~DeathTestFactory() { } + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) = 0; +}; + +// A concrete DeathTestFactory implementation for normal use. +class DefaultDeathTestFactory : public DeathTestFactory { + public: + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); +}; + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +GTEST_API_ bool ExitedUnsuccessfully(int exit_status); + +// Traps C++ exceptions escaping statement and reports them as test +// failures. Note that trapping SEH exceptions is not implemented here. +# if GTEST_HAS_EXCEPTIONS +# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } catch (const ::std::exception& gtest_exception) { \ + fprintf(\ + stderr, \ + "\n%s: Caught std::exception-derived exception escaping the " \ + "death test statement. Exception message: %s\n", \ + ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \ + gtest_exception.what()); \ + fflush(stderr); \ + death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ + } catch (...) { \ + death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ + } + +# else +# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) + +# endif + +// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*, +// ASSERT_EXIT*, and EXPECT_EXIT*. +# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + const ::testing::internal::RE& gtest_regex = (regex); \ + ::testing::internal::DeathTest* gtest_dt; \ + if (!::testing::internal::DeathTest::Create(#statement, >est_regex, \ + __FILE__, __LINE__, >est_dt)) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ + } \ + if (gtest_dt != NULL) { \ + ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \ + gtest_dt_ptr(gtest_dt); \ + switch (gtest_dt->AssumeRole()) { \ + case ::testing::internal::DeathTest::OVERSEE_TEST: \ + if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ + } \ + break; \ + case ::testing::internal::DeathTest::EXECUTE_TEST: { \ + ::testing::internal::DeathTest::ReturnSentinel \ + gtest_sentinel(gtest_dt); \ + GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \ + gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \ + break; \ + } \ + default: \ + break; \ + } \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \ + fail(::testing::internal::DeathTest::LastMessage()) +// The symbol "fail" here expands to something into which a message +// can be streamed. + +// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in +// NDEBUG mode. In this case we need the statements to be executed, the regex is +// ignored, and the macro must accept a streamed message even though the message +// is never printed. +# define GTEST_EXECUTE_STATEMENT_(statement, regex) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } else \ + ::testing::Message() + +// A class representing the parsed contents of the +// --gtest_internal_run_death_test flag, as it existed when +// RUN_ALL_TESTS was called. +class InternalRunDeathTestFlag { + public: + InternalRunDeathTestFlag(const std::string& a_file, + int a_line, + int an_index, + int a_write_fd) + : file_(a_file), line_(a_line), index_(an_index), + write_fd_(a_write_fd) {} + + ~InternalRunDeathTestFlag() { + if (write_fd_ >= 0) + posix::Close(write_fd_); + } + + const std::string& file() const { return file_; } + int line() const { return line_; } + int index() const { return index_; } + int write_fd() const { return write_fd_; } + + private: + std::string file_; + int line_; + int index_; + int write_fd_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag); +}; + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); + +#else // GTEST_HAS_DEATH_TEST + +// This macro is used for implementing macros such as +// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where +// death tests are not supported. Those macros must compile on such systems +// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on +// systems that support death tests. This allows one to write such a macro +// on a system that does not support death tests and be sure that it will +// compile on a death-test supporting system. +// +// Parameters: +// statement - A statement that a macro such as EXPECT_DEATH would test +// for program termination. This macro has to make sure this +// statement is compiled but not executed, to ensure that +// EXPECT_DEATH_IF_SUPPORTED compiles with a certain +// parameter iff EXPECT_DEATH compiles with it. +// regex - A regex that a macro such as EXPECT_DEATH would use to test +// the output of statement. This parameter has to be +// compiled but not evaluated by this macro, to ensure that +// this macro only accepts expressions that a macro such as +// EXPECT_DEATH would accept. +// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED +// and a return statement for ASSERT_DEATH_IF_SUPPORTED. +// This ensures that ASSERT_DEATH_IF_SUPPORTED will not +// compile inside functions where ASSERT_DEATH doesn't +// compile. +// +// The branch that has an always false condition is used to ensure that +// statement and regex are compiled (and thus syntactically correct) but +// never executed. The unreachable code macro protects the terminator +// statement from generating an 'unreachable code' warning in case +// statement unconditionally returns or throws. The Message constructor at +// the end allows the syntax of streaming additional messages into the +// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. +# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_LOG_(WARNING) \ + << "Death tests are not supported on this platform.\n" \ + << "Statement '" #statement "' cannot be verified."; \ + } else if (::testing::internal::AlwaysFalse()) { \ + ::testing::internal::RE::PartialMatch(".*", (regex)); \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + terminator; \ + } else \ + ::testing::Message() + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ + +namespace testing { + +// This flag controls the style of death tests. Valid values are "threadsafe", +// meaning that the death test child process will re-execute the test binary +// from the start, running only a single death test, or "fast", +// meaning that the child process will execute the test logic immediately +// after forking. +GTEST_DECLARE_string_(death_test_style); + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +GTEST_API_ bool InDeathTestChild(); + +} // namespace internal + +// The following macros are useful for writing death tests. + +// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is +// executed: +// +// 1. It generates a warning if there is more than one active +// thread. This is because it's safe to fork() or clone() only +// when there is a single thread. +// +// 2. The parent process clone()s a sub-process and runs the death +// test in it; the sub-process exits with code 0 at the end of the +// death test, if it hasn't exited already. +// +// 3. The parent process waits for the sub-process to terminate. +// +// 4. The parent process checks the exit code and error message of +// the sub-process. +// +// Examples: +// +// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number"); +// for (int i = 0; i < 5; i++) { +// EXPECT_DEATH(server.ProcessRequest(i), +// "Invalid request .* in ProcessRequest()") +// << "Failed to die on request " << i; +// } +// +// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting"); +// +// bool KilledBySIGHUP(int exit_code) { +// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP; +// } +// +// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); +// +// On the regular expressions used in death tests: +// +// On POSIX-compliant systems (*nix), we use the <regex.h> library, +// which uses the POSIX extended regex syntax. +// +// On other platforms (e.g. Windows), we only support a simple regex +// syntax implemented as part of Google Test. This limited +// implementation should be enough most of the time when writing +// death tests; though it lacks many features you can find in PCRE +// or POSIX extended regex syntax. For example, we don't support +// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and +// repetition count ("x{5,7}"), among others. +// +// Below is the syntax that we do support. We chose it to be a +// subset of both PCRE and POSIX extended regex, so it's easy to +// learn wherever you come from. In the following: 'A' denotes a +// literal character, period (.), or a single \\ escape sequence; +// 'x' and 'y' denote regular expressions; 'm' and 'n' are for +// natural numbers. +// +// c matches any literal character c +// \\d matches any decimal digit +// \\D matches any character that's not a decimal digit +// \\f matches \f +// \\n matches \n +// \\r matches \r +// \\s matches any ASCII whitespace, including \n +// \\S matches any character that's not a whitespace +// \\t matches \t +// \\v matches \v +// \\w matches any letter, _, or decimal digit +// \\W matches any character that \\w doesn't match +// \\c matches any literal character c, which must be a punctuation +// . matches any single character except \n +// A? matches 0 or 1 occurrences of A +// A* matches 0 or many occurrences of A +// A+ matches 1 or many occurrences of A +// ^ matches the beginning of a string (not that of each line) +// $ matches the end of a string (not that of each line) +// xy matches x followed by y +// +// If you accidentally use PCRE or POSIX extended regex features +// not implemented by us, you will get a run-time failure. In that +// case, please try to rewrite your regular expression within the +// above syntax. +// +// This implementation is *not* meant to be as highly tuned or robust +// as a compiled regex library, but should perform well enough for a +// death test, which already incurs significant overhead by launching +// a child process. +// +// Known caveats: +// +// A "threadsafe" style death test obtains the path to the test +// program from argv[0] and re-executes it in the sub-process. For +// simplicity, the current implementation doesn't search the PATH +// when launching the sub-process. This means that the user must +// invoke the test program via a path that contains at least one +// path separator (e.g. path/to/foo_test and +// /absolute/path/to/bar_test are fine, but foo_test is not). This +// is rarely a problem as people usually don't put the test binary +// directory in PATH. +// +// TODO(wan@google.com): make thread-safe death tests search the PATH. + +// Asserts that a given statement causes the program to exit, with an +// integer exit status that satisfies predicate, and emitting error output +// that matches regex. +# define ASSERT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_) + +// Like ASSERT_EXIT, but continues on to successive tests in the +// test case, if any: +# define EXPECT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_) + +// Asserts that a given statement causes the program to exit, either by +// explicitly exiting with a nonzero exit code or being killed by a +// signal, and emitting error output that matches regex. +# define ASSERT_DEATH(statement, regex) \ + ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Like ASSERT_DEATH, but continues on to successive tests in the +// test case, if any: +# define EXPECT_DEATH(statement, regex) \ + EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*: + +// Tests that an exit code describes a normal exit with a given exit code. +class GTEST_API_ ExitedWithCode { + public: + explicit ExitedWithCode(int exit_code); + bool operator()(int exit_status) const; + private: + // No implementation - assignment is unsupported. + void operator=(const ExitedWithCode& other); + + const int exit_code_; +}; + +# if !GTEST_OS_WINDOWS +// Tests that an exit code describes an exit due to termination by a +// given signal. +class GTEST_API_ KilledBySignal { + public: + explicit KilledBySignal(int signum); + bool operator()(int exit_status) const; + private: + const int signum_; +}; +# endif // !GTEST_OS_WINDOWS + +// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode. +// The death testing framework causes this to have interesting semantics, +// since the sideeffects of the call are only visible in opt mode, and not +// in debug mode. +// +// In practice, this can be used to test functions that utilize the +// LOG(DFATAL) macro using the following style: +// +// int DieInDebugOr12(int* sideeffect) { +// if (sideeffect) { +// *sideeffect = 12; +// } +// LOG(DFATAL) << "death"; +// return 12; +// } +// +// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) { +// int sideeffect = 0; +// // Only asserts in dbg. +// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death"); +// +// #ifdef NDEBUG +// // opt-mode has sideeffect visible. +// EXPECT_EQ(12, sideeffect); +// #else +// // dbg-mode no visible sideeffect. +// EXPECT_EQ(0, sideeffect); +// #endif +// } +// +// This will assert that DieInDebugReturn12InOpt() crashes in debug +// mode, usually due to a DCHECK or LOG(DFATAL), but returns the +// appropriate fallback value (12 in this case) in opt mode. If you +// need to test that a function has appropriate side-effects in opt +// mode, include assertions against the side-effects. A general +// pattern for this is: +// +// EXPECT_DEBUG_DEATH({ +// // Side-effects here will have an effect after this statement in +// // opt mode, but none in debug mode. +// EXPECT_EQ(12, DieInDebugOr12(&sideeffect)); +// }, "death"); +// +# ifdef NDEBUG + +# define EXPECT_DEBUG_DEATH(statement, regex) \ + GTEST_EXECUTE_STATEMENT_(statement, regex) + +# define ASSERT_DEBUG_DEATH(statement, regex) \ + GTEST_EXECUTE_STATEMENT_(statement, regex) + +# else + +# define EXPECT_DEBUG_DEATH(statement, regex) \ + EXPECT_DEATH(statement, regex) + +# define ASSERT_DEBUG_DEATH(statement, regex) \ + ASSERT_DEATH(statement, regex) + +# endif // NDEBUG for EXPECT_DEBUG_DEATH +#endif // GTEST_HAS_DEATH_TEST + +// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and +// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if +// death tests are supported; otherwise they just issue a warning. This is +// useful when you are combining death test assertions with normal test +// assertions in one test. +#if GTEST_HAS_DEATH_TEST +# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + EXPECT_DEATH(statement, regex) +# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ + ASSERT_DEATH(statement, regex) +#else +# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, ) +# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ + GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return) +#endif + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ +// This file was GENERATED by command: +// pump.py gtest-param-test.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vladl@google.com (Vlad Losev) +// +// Macros and functions for implementing parameterized tests +// in Google C++ Testing Framework (Google Test) +// +// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ + + +// Value-parameterized tests allow you to test your code with different +// parameters without writing multiple copies of the same test. +// +// Here is how you use value-parameterized tests: + +#if 0 + +// To write value-parameterized tests, first you should define a fixture +// class. It is usually derived from testing::TestWithParam<T> (see below for +// another inheritance scheme that's sometimes useful in more complicated +// class hierarchies), where the type of your parameter values. +// TestWithParam<T> is itself derived from testing::Test. T can be any +// copyable type. If it's a raw pointer, you are responsible for managing the +// lifespan of the pointed values. + +class FooTest : public ::testing::TestWithParam<const char*> { + // You can implement all the usual class fixture members here. +}; + +// Then, use the TEST_P macro to define as many parameterized tests +// for this fixture as you want. The _P suffix is for "parameterized" +// or "pattern", whichever you prefer to think. + +TEST_P(FooTest, DoesBlah) { + // Inside a test, access the test parameter with the GetParam() method + // of the TestWithParam<T> class: + EXPECT_TRUE(foo.Blah(GetParam())); + ... +} + +TEST_P(FooTest, HasBlahBlah) { + ... +} + +// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test +// case with any set of parameters you want. Google Test defines a number +// of functions for generating test parameters. They return what we call +// (surprise!) parameter generators. Here is a summary of them, which +// are all in the testing namespace: +// +// +// Range(begin, end [, step]) - Yields values {begin, begin+step, +// begin+step+step, ...}. The values do not +// include end. step defaults to 1. +// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}. +// ValuesIn(container) - Yields values from a C-style array, an STL +// ValuesIn(begin,end) container, or an iterator range [begin, end). +// Bool() - Yields sequence {false, true}. +// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product +// for the math savvy) of the values generated +// by the N generators. +// +// For more details, see comments at the definitions of these functions below +// in this file. +// +// The following statement will instantiate tests from the FooTest test case +// each with parameter values "meeny", "miny", and "moe". + +INSTANTIATE_TEST_CASE_P(InstantiationName, + FooTest, + Values("meeny", "miny", "moe")); + +// To distinguish different instances of the pattern, (yes, you +// can instantiate it more then once) the first argument to the +// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the +// actual test case name. Remember to pick unique prefixes for different +// instantiations. The tests from the instantiation above will have +// these names: +// +// * InstantiationName/FooTest.DoesBlah/0 for "meeny" +// * InstantiationName/FooTest.DoesBlah/1 for "miny" +// * InstantiationName/FooTest.DoesBlah/2 for "moe" +// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny" +// * InstantiationName/FooTest.HasBlahBlah/1 for "miny" +// * InstantiationName/FooTest.HasBlahBlah/2 for "moe" +// +// You can use these names in --gtest_filter. +// +// This statement will instantiate all tests from FooTest again, each +// with parameter values "cat" and "dog": + +const char* pets[] = {"cat", "dog"}; +INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets)); + +// The tests from the instantiation above will have these names: +// +// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat" +// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog" +// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat" +// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog" +// +// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests +// in the given test case, whether their definitions come before or +// AFTER the INSTANTIATE_TEST_CASE_P statement. +// +// Please also note that generator expressions (including parameters to the +// generators) are evaluated in InitGoogleTest(), after main() has started. +// This allows the user on one hand, to adjust generator parameters in order +// to dynamically determine a set of tests to run and on the other hand, +// give the user a chance to inspect the generated tests with Google Test +// reflection API before RUN_ALL_TESTS() is executed. +// +// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc +// for more examples. +// +// In the future, we plan to publish the API for defining new parameter +// generators. But for now this interface remains part of the internal +// implementation and is subject to change. +// +// +// A parameterized test fixture must be derived from testing::Test and from +// testing::WithParamInterface<T>, where T is the type of the parameter +// values. Inheriting from TestWithParam<T> satisfies that requirement because +// TestWithParam<T> inherits from both Test and WithParamInterface. In more +// complicated hierarchies, however, it is occasionally useful to inherit +// separately from Test and WithParamInterface. For example: + +class BaseTest : public ::testing::Test { + // You can inherit all the usual members for a non-parameterized test + // fixture here. +}; + +class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> { + // The usual test fixture members go here too. +}; + +TEST_F(BaseTest, HasFoo) { + // This is an ordinary non-parameterized test. +} + +TEST_P(DerivedTest, DoesBlah) { + // GetParam works just the same here as if you inherit from TestWithParam. + EXPECT_TRUE(foo.Blah(GetParam())); +} + +#endif // 0 + + +#if !GTEST_OS_SYMBIAN +# include <utility> +#endif + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// Type and function utilities for implementing parameterized tests. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ + +#include <iterator> +#include <utility> +#include <vector> + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. +// Copyright 2003 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Dan Egnor (egnor@google.com) +// +// A "smart" pointer type with reference tracking. Every pointer to a +// particular object is kept on a circular linked list. When the last pointer +// to an object is destroyed or reassigned, the object is deleted. +// +// Used properly, this deletes the object when the last reference goes away. +// There are several caveats: +// - Like all reference counting schemes, cycles lead to leaks. +// - Each smart pointer is actually two pointers (8 bytes instead of 4). +// - Every time a pointer is assigned, the entire list of pointers to that +// object is traversed. This class is therefore NOT SUITABLE when there +// will often be more than two or three pointers to a particular object. +// - References are only tracked as long as linked_ptr<> objects are copied. +// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS +// will happen (double deletion). +// +// A good use of this class is storing object references in STL containers. +// You can safely put linked_ptr<> in a vector<>. +// Other uses may not be as good. +// +// Note: If you use an incomplete type with linked_ptr<>, the class +// *containing* linked_ptr<> must have a constructor and destructor (even +// if they do nothing!). +// +// Bill Gibbons suggested we use something like this. +// +// Thread Safety: +// Unlike other linked_ptr implementations, in this implementation +// a linked_ptr object is thread-safe in the sense that: +// - it's safe to copy linked_ptr objects concurrently, +// - it's safe to copy *from* a linked_ptr and read its underlying +// raw pointer (e.g. via get()) concurrently, and +// - it's safe to write to two linked_ptrs that point to the same +// shared object concurrently. +// TODO(wan@google.com): rename this to safe_linked_ptr to avoid +// confusion with normal linked_ptr. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ + +#include <stdlib.h> +#include <assert.h> + + +namespace testing { +namespace internal { + +// Protects copying of all linked_ptr objects. +GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// This is used internally by all instances of linked_ptr<>. It needs to be +// a non-template class because different types of linked_ptr<> can refer to +// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)). +// So, it needs to be possible for different types of linked_ptr to participate +// in the same circular linked list, so we need a single class type here. +// +// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>. +class linked_ptr_internal { + public: + // Create a new circle that includes only this instance. + void join_new() { + next_ = this; + } + + // Many linked_ptr operations may change p.link_ for some linked_ptr + // variable p in the same circle as this object. Therefore we need + // to prevent two such operations from occurring concurrently. + // + // Note that different types of linked_ptr objects can coexist in a + // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and + // linked_ptr<Derived2>). Therefore we must use a single mutex to + // protect all linked_ptr objects. This can create serious + // contention in production code, but is acceptable in a testing + // framework. + + // Join an existing circle. + void join(linked_ptr_internal const* ptr) + GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { + MutexLock lock(&g_linked_ptr_mutex); + + linked_ptr_internal const* p = ptr; + while (p->next_ != ptr) p = p->next_; + p->next_ = this; + next_ = ptr; + } + + // Leave whatever circle we're part of. Returns true if we were the + // last member of the circle. Once this is done, you can join() another. + bool depart() + GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { + MutexLock lock(&g_linked_ptr_mutex); + + if (next_ == this) return true; + linked_ptr_internal const* p = next_; + while (p->next_ != this) p = p->next_; + p->next_ = next_; + return false; + } + + private: + mutable linked_ptr_internal const* next_; +}; + +template <typename T> +class linked_ptr { + public: + typedef T element_type; + + // Take over ownership of a raw pointer. This should happen as soon as + // possible after the object is created. + explicit linked_ptr(T* ptr = NULL) { capture(ptr); } + ~linked_ptr() { depart(); } + + // Copy an existing linked_ptr<>, adding ourselves to the list of references. + template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); } + linked_ptr(linked_ptr const& ptr) { // NOLINT + assert(&ptr != this); + copy(&ptr); + } + + // Assignment releases the old value and acquires the new. + template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) { + depart(); + copy(&ptr); + return *this; + } + + linked_ptr& operator=(linked_ptr const& ptr) { + if (&ptr != this) { + depart(); + copy(&ptr); + } + return *this; + } + + // Smart pointer members. + void reset(T* ptr = NULL) { + depart(); + capture(ptr); + } + T* get() const { return value_; } + T* operator->() const { return value_; } + T& operator*() const { return *value_; } + + bool operator==(T* p) const { return value_ == p; } + bool operator!=(T* p) const { return value_ != p; } + template <typename U> + bool operator==(linked_ptr<U> const& ptr) const { + return value_ == ptr.get(); + } + template <typename U> + bool operator!=(linked_ptr<U> const& ptr) const { + return value_ != ptr.get(); + } + + private: + template <typename U> + friend class linked_ptr; + + T* value_; + linked_ptr_internal link_; + + void depart() { + if (link_.depart()) delete value_; + } + + void capture(T* ptr) { + value_ = ptr; + link_.join_new(); + } + + template <typename U> void copy(linked_ptr<U> const* ptr) { + value_ = ptr->get(); + if (value_) + link_.join(&ptr->link_); + else + link_.join_new(); + } +}; + +template<typename T> inline +bool operator==(T* ptr, const linked_ptr<T>& x) { + return ptr == x.get(); +} + +template<typename T> inline +bool operator!=(T* ptr, const linked_ptr<T>& x) { + return ptr != x.get(); +} + +// A function to convert T* into linked_ptr<T> +// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation +// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg)) +template <typename T> +linked_ptr<T> make_linked_ptr(T* ptr) { + return linked_ptr<T>(ptr); +} + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr); +// +// A user can teach this function how to print a class type T by +// defining either operator<<() or PrintTo() in the namespace that +// defines T. More specifically, the FIRST defined function in the +// following list will be used (assuming T is defined in namespace +// foo): +// +// 1. foo::PrintTo(const T&, ostream*) +// 2. operator<<(ostream&, const T&) defined in either foo or the +// global namespace. +// +// If none of the above is defined, it will print the debug string of +// the value if it is a protocol buffer, or print the raw bytes in the +// value otherwise. +// +// To aid debugging: when T is a reference type, the address of the +// value is also printed; when T is a (const) char pointer, both the +// pointer value and the NUL-terminated string it points to are +// printed. +// +// We also provide some convenient wrappers: +// +// // Prints a value to a string. For a (const or not) char +// // pointer, the NUL-terminated string (but not the pointer) is +// // printed. +// std::string ::testing::PrintToString(const T& value); +// +// // Prints a value tersely: for a reference type, the referenced +// // value (but not the address) is printed; for a (const or not) char +// // pointer, the NUL-terminated string (but not the pointer) is +// // printed. +// void ::testing::internal::UniversalTersePrint(const T& value, ostream*); +// +// // Prints value using the type inferred by the compiler. The difference +// // from UniversalTersePrint() is that this function prints both the +// // pointer and the NUL-terminated string for a (const or not) char pointer. +// void ::testing::internal::UniversalPrint(const T& value, ostream*); +// +// // Prints the fields of a tuple tersely to a string vector, one +// // element for each field. Tuple support must be enabled in +// // gtest-port.h. +// std::vector<string> UniversalTersePrintTupleFieldsToStrings( +// const Tuple& value); +// +// Known limitation: +// +// The print primitives print the elements of an STL-style container +// using the compiler-inferred type of *iter where iter is a +// const_iterator of the container. When const_iterator is an input +// iterator but not a forward iterator, this inferred type may not +// match value_type, and the print output may be incorrect. In +// practice, this is rarely a problem as for most containers +// const_iterator is a forward iterator. We'll fix this if there's an +// actual need for it. Note that this fix cannot rely on value_type +// being defined as many user-defined container types don't have +// value_type. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ + +#include <ostream> // NOLINT +#include <sstream> +#include <string> +#include <utility> +#include <vector> + +namespace testing { + +// Definitions in the 'internal' and 'internal2' name spaces are +// subject to change without notice. DO NOT USE THEM IN USER CODE! +namespace internal2 { + +// Prints the given number of bytes in the given object to the given +// ostream. +GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes, + size_t count, + ::std::ostream* os); + +// For selecting which printer to use when a given type has neither << +// nor PrintTo(). +enum TypeKind { + kProtobuf, // a protobuf type + kConvertibleToInteger, // a type implicitly convertible to BiggestInt + // (e.g. a named or unnamed enum type) + kOtherType // anything else +}; + +// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called +// by the universal printer to print a value of type T when neither +// operator<< nor PrintTo() is defined for T, where kTypeKind is the +// "kind" of T as defined by enum TypeKind. +template <typename T, TypeKind kTypeKind> +class TypeWithoutFormatter { + public: + // This default version is called when kTypeKind is kOtherType. + static void PrintValue(const T& value, ::std::ostream* os) { + PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value), + sizeof(value), os); + } +}; + +// We print a protobuf using its ShortDebugString() when the string +// doesn't exceed this many characters; otherwise we print it using +// DebugString() for better readability. +const size_t kProtobufOneLinerMaxLength = 50; + +template <typename T> +class TypeWithoutFormatter<T, kProtobuf> { + public: + static void PrintValue(const T& value, ::std::ostream* os) { + const ::testing::internal::string short_str = value.ShortDebugString(); + const ::testing::internal::string pretty_str = + short_str.length() <= kProtobufOneLinerMaxLength ? + short_str : ("\n" + value.DebugString()); + *os << ("<" + pretty_str + ">"); + } +}; + +template <typename T> +class TypeWithoutFormatter<T, kConvertibleToInteger> { + public: + // Since T has no << operator or PrintTo() but can be implicitly + // converted to BiggestInt, we print it as a BiggestInt. + // + // Most likely T is an enum type (either named or unnamed), in which + // case printing it as an integer is the desired behavior. In case + // T is not an enum, printing it as an integer is the best we can do + // given that it has no user-defined printer. + static void PrintValue(const T& value, ::std::ostream* os) { + const internal::BiggestInt kBigInt = value; + *os << kBigInt; + } +}; + +// Prints the given value to the given ostream. If the value is a +// protocol message, its debug string is printed; if it's an enum or +// of a type implicitly convertible to BiggestInt, it's printed as an +// integer; otherwise the bytes in the value are printed. This is +// what UniversalPrinter<T>::Print() does when it knows nothing about +// type T and T has neither << operator nor PrintTo(). +// +// A user can override this behavior for a class type Foo by defining +// a << operator in the namespace where Foo is defined. +// +// We put this operator in namespace 'internal2' instead of 'internal' +// to simplify the implementation, as much code in 'internal' needs to +// use << in STL, which would conflict with our own << were it defined +// in 'internal'. +// +// Note that this operator<< takes a generic std::basic_ostream<Char, +// CharTraits> type instead of the more restricted std::ostream. If +// we define it to take an std::ostream instead, we'll get an +// "ambiguous overloads" compiler error when trying to print a type +// Foo that supports streaming to std::basic_ostream<Char, +// CharTraits>, as the compiler cannot tell whether +// operator<<(std::ostream&, const T&) or +// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more +// specific. +template <typename Char, typename CharTraits, typename T> +::std::basic_ostream<Char, CharTraits>& operator<<( + ::std::basic_ostream<Char, CharTraits>& os, const T& x) { + TypeWithoutFormatter<T, + (internal::IsAProtocolMessage<T>::value ? kProtobuf : + internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ? + kConvertibleToInteger : kOtherType)>::PrintValue(x, &os); + return os; +} + +} // namespace internal2 +} // namespace testing + +// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up +// magic needed for implementing UniversalPrinter won't work. +namespace testing_internal { + +// Used to print a value that is not an STL-style container when the +// user doesn't define PrintTo() for it. +template <typename T> +void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) { + // With the following statement, during unqualified name lookup, + // testing::internal2::operator<< appears as if it was declared in + // the nearest enclosing namespace that contains both + // ::testing_internal and ::testing::internal2, i.e. the global + // namespace. For more details, refer to the C++ Standard section + // 7.3.4-1 [namespace.udir]. This allows us to fall back onto + // testing::internal2::operator<< in case T doesn't come with a << + // operator. + // + // We cannot write 'using ::testing::internal2::operator<<;', which + // gcc 3.3 fails to compile due to a compiler bug. + using namespace ::testing::internal2; // NOLINT + + // Assuming T is defined in namespace foo, in the next statement, + // the compiler will consider all of: + // + // 1. foo::operator<< (thanks to Koenig look-up), + // 2. ::operator<< (as the current namespace is enclosed in ::), + // 3. testing::internal2::operator<< (thanks to the using statement above). + // + // The operator<< whose type matches T best will be picked. + // + // We deliberately allow #2 to be a candidate, as sometimes it's + // impossible to define #1 (e.g. when foo is ::std, defining + // anything in it is undefined behavior unless you are a compiler + // vendor.). + *os << value; +} + +} // namespace testing_internal + +namespace testing { +namespace internal { + +// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given +// value to the given ostream. The caller must ensure that +// 'ostream_ptr' is not NULL, or the behavior is undefined. +// +// We define UniversalPrinter as a class template (as opposed to a +// function template), as we need to partially specialize it for +// reference types, which cannot be done with function templates. +template <typename T> +class UniversalPrinter; + +template <typename T> +void UniversalPrint(const T& value, ::std::ostream* os); + +// Used to print an STL-style container when the user doesn't define +// a PrintTo() for it. +template <typename C> +void DefaultPrintTo(IsContainer /* dummy */, + false_type /* is not a pointer */, + const C& container, ::std::ostream* os) { + const size_t kMaxCount = 32; // The maximum number of elements to print. + *os << '{'; + size_t count = 0; + for (typename C::const_iterator it = container.begin(); + it != container.end(); ++it, ++count) { + if (count > 0) { + *os << ','; + if (count == kMaxCount) { // Enough has been printed. + *os << " ..."; + break; + } + } + *os << ' '; + // We cannot call PrintTo(*it, os) here as PrintTo() doesn't + // handle *it being a native array. + internal::UniversalPrint(*it, os); + } + + if (count > 0) { + *os << ' '; + } + *os << '}'; +} + +// Used to print a pointer that is neither a char pointer nor a member +// pointer, when the user doesn't define PrintTo() for it. (A member +// variable pointer or member function pointer doesn't really point to +// a location in the address space. Their representation is +// implementation-defined. Therefore they will be printed as raw +// bytes.) +template <typename T> +void DefaultPrintTo(IsNotContainer /* dummy */, + true_type /* is a pointer */, + T* p, ::std::ostream* os) { + if (p == NULL) { + *os << "NULL"; + } else { + // C++ doesn't allow casting from a function pointer to any object + // pointer. + // + // IsTrue() silences warnings: "Condition is always true", + // "unreachable code". + if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) { + // T is not a function type. We just call << to print p, + // relying on ADL to pick up user-defined << for their pointer + // types, if any. + *os << p; + } else { + // T is a function type, so '*os << p' doesn't do what we want + // (it just prints p as bool). We want to print p as a const + // void*. However, we cannot cast it to const void* directly, + // even using reinterpret_cast, as earlier versions of gcc + // (e.g. 3.4.5) cannot compile the cast when p is a function + // pointer. Casting to UInt64 first solves the problem. + *os << reinterpret_cast<const void*>( + reinterpret_cast<internal::UInt64>(p)); + } + } +} + +// Used to print a non-container, non-pointer value when the user +// doesn't define PrintTo() for it. +template <typename T> +void DefaultPrintTo(IsNotContainer /* dummy */, + false_type /* is not a pointer */, + const T& value, ::std::ostream* os) { + ::testing_internal::DefaultPrintNonContainerTo(value, os); +} + +// Prints the given value using the << operator if it has one; +// otherwise prints the bytes in it. This is what +// UniversalPrinter<T>::Print() does when PrintTo() is not specialized +// or overloaded for type T. +// +// A user can override this behavior for a class type Foo by defining +// an overload of PrintTo() in the namespace where Foo is defined. We +// give the user this option as sometimes defining a << operator for +// Foo is not desirable (e.g. the coding style may prevent doing it, +// or there is already a << operator but it doesn't do what the user +// wants). +template <typename T> +void PrintTo(const T& value, ::std::ostream* os) { + // DefaultPrintTo() is overloaded. The type of its first two + // arguments determine which version will be picked. If T is an + // STL-style container, the version for container will be called; if + // T is a pointer, the pointer version will be called; otherwise the + // generic version will be called. + // + // Note that we check for container types here, prior to we check + // for protocol message types in our operator<<. The rationale is: + // + // For protocol messages, we want to give people a chance to + // override Google Mock's format by defining a PrintTo() or + // operator<<. For STL containers, other formats can be + // incompatible with Google Mock's format for the container + // elements; therefore we check for container types here to ensure + // that our format is used. + // + // The second argument of DefaultPrintTo() is needed to bypass a bug + // in Symbian's C++ compiler that prevents it from picking the right + // overload between: + // + // PrintTo(const T& x, ...); + // PrintTo(T* x, ...); + DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os); +} + +// The following list of PrintTo() overloads tells +// UniversalPrinter<T>::Print() how to print standard types (built-in +// types, strings, plain arrays, and pointers). + +// Overloads for various char types. +GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os); +GTEST_API_ void PrintTo(signed char c, ::std::ostream* os); +inline void PrintTo(char c, ::std::ostream* os) { + // When printing a plain char, we always treat it as unsigned. This + // way, the output won't be affected by whether the compiler thinks + // char is signed or not. + PrintTo(static_cast<unsigned char>(c), os); +} + +// Overloads for other simple built-in types. +inline void PrintTo(bool x, ::std::ostream* os) { + *os << (x ? "true" : "false"); +} + +// Overload for wchar_t type. +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its decimal code (except for L'\0'). +// The L'\0' char is printed as "L'\\0'". The decimal code is printed +// as signed integer when wchar_t is implemented by the compiler +// as a signed type and is printed as an unsigned integer when wchar_t +// is implemented as an unsigned type. +GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os); + +// Overloads for C strings. +GTEST_API_ void PrintTo(const char* s, ::std::ostream* os); +inline void PrintTo(char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const char*>(s), os); +} + +// signed/unsigned char is often used for representing binary data, so +// we print pointers to it as void* to be safe. +inline void PrintTo(const signed char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const void*>(s), os); +} +inline void PrintTo(signed char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const void*>(s), os); +} +inline void PrintTo(const unsigned char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const void*>(s), os); +} +inline void PrintTo(unsigned char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const void*>(s), os); +} + +// MSVC can be configured to define wchar_t as a typedef of unsigned +// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native +// type. When wchar_t is a typedef, defining an overload for const +// wchar_t* would cause unsigned short* be printed as a wide string, +// possibly causing invalid memory accesses. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Overloads for wide C strings +GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os); +inline void PrintTo(wchar_t* s, ::std::ostream* os) { + PrintTo(ImplicitCast_<const wchar_t*>(s), os); +} +#endif + +// Overload for C arrays. Multi-dimensional arrays are printed +// properly. + +// Prints the given number of elements in an array, without printing +// the curly braces. +template <typename T> +void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) { + UniversalPrint(a[0], os); + for (size_t i = 1; i != count; i++) { + *os << ", "; + UniversalPrint(a[i], os); + } +} + +// Overloads for ::string and ::std::string. +#if GTEST_HAS_GLOBAL_STRING +GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os); +inline void PrintTo(const ::string& s, ::std::ostream* os) { + PrintStringTo(s, os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os); +inline void PrintTo(const ::std::string& s, ::std::ostream* os) { + PrintStringTo(s, os); +} + +// Overloads for ::wstring and ::std::wstring. +#if GTEST_HAS_GLOBAL_WSTRING +GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os); +inline void PrintTo(const ::wstring& s, ::std::ostream* os) { + PrintWideStringTo(s, os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os); +inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) { + PrintWideStringTo(s, os); +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_TR1_TUPLE +// Overload for ::std::tr1::tuple. Needed for printing function arguments, +// which are packed as tuples. + +// Helper function for printing a tuple. T must be instantiated with +// a tuple type. +template <typename T> +void PrintTupleTo(const T& t, ::std::ostream* os); + +// Overloaded PrintTo() for tuples of various arities. We support +// tuples of up-to 10 fields. The following implementation works +// regardless of whether tr1::tuple is implemented using the +// non-standard variadic template feature or not. + +inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1> +void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2> +void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +void PrintTo( + const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} +#endif // GTEST_HAS_TR1_TUPLE + +// Overload for std::pair. +template <typename T1, typename T2> +void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) { + *os << '('; + // We cannot use UniversalPrint(value.first, os) here, as T1 may be + // a reference type. The same for printing value.second. + UniversalPrinter<T1>::Print(value.first, os); + *os << ", "; + UniversalPrinter<T2>::Print(value.second, os); + *os << ')'; +} + +// Implements printing a non-reference type T by letting the compiler +// pick the right overload of PrintTo() for T. +template <typename T> +class UniversalPrinter { + public: + // MSVC warns about adding const to a function type, so we want to + // disable the warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4180) // Temporarily disables warning 4180. +#endif // _MSC_VER + + // Note: we deliberately don't call this PrintTo(), as that name + // conflicts with ::testing::internal::PrintTo in the body of the + // function. + static void Print(const T& value, ::std::ostream* os) { + // By default, ::testing::internal::PrintTo() is used for printing + // the value. + // + // Thanks to Koenig look-up, if T is a class and has its own + // PrintTo() function defined in its namespace, that function will + // be visible here. Since it is more specific than the generic ones + // in ::testing::internal, it will be picked by the compiler in the + // following statement - exactly what we want. + PrintTo(value, os); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif // _MSC_VER +}; + +// UniversalPrintArray(begin, len, os) prints an array of 'len' +// elements, starting at address 'begin'. +template <typename T> +void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) { + if (len == 0) { + *os << "{}"; + } else { + *os << "{ "; + const size_t kThreshold = 18; + const size_t kChunkSize = 8; + // If the array has more than kThreshold elements, we'll have to + // omit some details by printing only the first and the last + // kChunkSize elements. + // TODO(wan@google.com): let the user control the threshold using a flag. + if (len <= kThreshold) { + PrintRawArrayTo(begin, len, os); + } else { + PrintRawArrayTo(begin, kChunkSize, os); + *os << ", ..., "; + PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os); + } + *os << " }"; + } +} +// This overload prints a (const) char array compactly. +GTEST_API_ void UniversalPrintArray( + const char* begin, size_t len, ::std::ostream* os); + +// This overload prints a (const) wchar_t array compactly. +GTEST_API_ void UniversalPrintArray( + const wchar_t* begin, size_t len, ::std::ostream* os); + +// Implements printing an array type T[N]. +template <typename T, size_t N> +class UniversalPrinter<T[N]> { + public: + // Prints the given array, omitting some elements when there are too + // many. + static void Print(const T (&a)[N], ::std::ostream* os) { + UniversalPrintArray(a, N, os); + } +}; + +// Implements printing a reference type T&. +template <typename T> +class UniversalPrinter<T&> { + public: + // MSVC warns about adding const to a function type, so we want to + // disable the warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4180) // Temporarily disables warning 4180. +#endif // _MSC_VER + + static void Print(const T& value, ::std::ostream* os) { + // Prints the address of the value. We use reinterpret_cast here + // as static_cast doesn't compile when T is a function type. + *os << "@" << reinterpret_cast<const void*>(&value) << " "; + + // Then prints the value itself. + UniversalPrint(value, os); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif // _MSC_VER +}; + +// Prints a value tersely: for a reference type, the referenced value +// (but not the address) is printed; for a (const) char pointer, the +// NUL-terminated string (but not the pointer) is printed. + +template <typename T> +class UniversalTersePrinter { + public: + static void Print(const T& value, ::std::ostream* os) { + UniversalPrint(value, os); + } +}; +template <typename T> +class UniversalTersePrinter<T&> { + public: + static void Print(const T& value, ::std::ostream* os) { + UniversalPrint(value, os); + } +}; +template <typename T, size_t N> +class UniversalTersePrinter<T[N]> { + public: + static void Print(const T (&value)[N], ::std::ostream* os) { + UniversalPrinter<T[N]>::Print(value, os); + } +}; +template <> +class UniversalTersePrinter<const char*> { + public: + static void Print(const char* str, ::std::ostream* os) { + if (str == NULL) { + *os << "NULL"; + } else { + UniversalPrint(string(str), os); + } + } +}; +template <> +class UniversalTersePrinter<char*> { + public: + static void Print(char* str, ::std::ostream* os) { + UniversalTersePrinter<const char*>::Print(str, os); + } +}; + +#if GTEST_HAS_STD_WSTRING +template <> +class UniversalTersePrinter<const wchar_t*> { + public: + static void Print(const wchar_t* str, ::std::ostream* os) { + if (str == NULL) { + *os << "NULL"; + } else { + UniversalPrint(::std::wstring(str), os); + } + } +}; +#endif + +template <> +class UniversalTersePrinter<wchar_t*> { + public: + static void Print(wchar_t* str, ::std::ostream* os) { + UniversalTersePrinter<const wchar_t*>::Print(str, os); + } +}; + +template <typename T> +void UniversalTersePrint(const T& value, ::std::ostream* os) { + UniversalTersePrinter<T>::Print(value, os); +} + +// Prints a value using the type inferred by the compiler. The +// difference between this and UniversalTersePrint() is that for a +// (const) char pointer, this prints both the pointer and the +// NUL-terminated string. +template <typename T> +void UniversalPrint(const T& value, ::std::ostream* os) { + // A workarond for the bug in VC++ 7.1 that prevents us from instantiating + // UniversalPrinter with T directly. + typedef T T1; + UniversalPrinter<T1>::Print(value, os); +} + +#if GTEST_HAS_TR1_TUPLE +typedef ::std::vector<string> Strings; + +// This helper template allows PrintTo() for tuples and +// UniversalTersePrintTupleFieldsToStrings() to be defined by +// induction on the number of tuple fields. The idea is that +// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N +// fields in tuple t, and can be defined in terms of +// TuplePrefixPrinter<N - 1>. + +// The inductive case. +template <size_t N> +struct TuplePrefixPrinter { + // Prints the first N fields of a tuple. + template <typename Tuple> + static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { + TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os); + *os << ", "; + UniversalPrinter<typename ::std::tr1::tuple_element<N - 1, Tuple>::type> + ::Print(::std::tr1::get<N - 1>(t), os); + } + + // Tersely prints the first N fields of a tuple to a string vector, + // one element for each field. + template <typename Tuple> + static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { + TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings); + ::std::stringstream ss; + UniversalTersePrint(::std::tr1::get<N - 1>(t), &ss); + strings->push_back(ss.str()); + } +}; + +// Base cases. +template <> +struct TuplePrefixPrinter<0> { + template <typename Tuple> + static void PrintPrefixTo(const Tuple&, ::std::ostream*) {} + + template <typename Tuple> + static void TersePrintPrefixToStrings(const Tuple&, Strings*) {} +}; +// We have to specialize the entire TuplePrefixPrinter<> class +// template here, even though the definition of +// TersePrintPrefixToStrings() is the same as the generic version, as +// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't +// support specializing a method template of a class template. +template <> +struct TuplePrefixPrinter<1> { + template <typename Tuple> + static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { + UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>:: + Print(::std::tr1::get<0>(t), os); + } + + template <typename Tuple> + static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { + ::std::stringstream ss; + UniversalTersePrint(::std::tr1::get<0>(t), &ss); + strings->push_back(ss.str()); + } +}; + +// Helper function for printing a tuple. T must be instantiated with +// a tuple type. +template <typename T> +void PrintTupleTo(const T& t, ::std::ostream* os) { + *os << "("; + TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>:: + PrintPrefixTo(t, os); + *os << ")"; +} + +// Prints the fields of a tuple tersely to a string vector, one +// element for each field. See the comment before +// UniversalTersePrint() for how we define "tersely". +template <typename Tuple> +Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) { + Strings result; + TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>:: + TersePrintPrefixToStrings(value, &result); + return result; +} +#endif // GTEST_HAS_TR1_TUPLE + +} // namespace internal + +template <typename T> +::std::string PrintToString(const T& value) { + ::std::stringstream ss; + internal::UniversalTersePrinter<T>::Print(value, &ss); + return ss.str(); +} + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ + +#if GTEST_HAS_PARAM_TEST + +namespace testing { +namespace internal { + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Outputs a message explaining invalid registration of different +// fixture class for the same test case. This may happen when +// TEST_P macro is used to define two tests with the same name +// but in different namespaces. +GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name, + const char* file, int line); + +template <typename> class ParamGeneratorInterface; +template <typename> class ParamGenerator; + +// Interface for iterating over elements provided by an implementation +// of ParamGeneratorInterface<T>. +template <typename T> +class ParamIteratorInterface { + public: + virtual ~ParamIteratorInterface() {} + // A pointer to the base generator instance. + // Used only for the purposes of iterator comparison + // to make sure that two iterators belong to the same generator. + virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0; + // Advances iterator to point to the next element + // provided by the generator. The caller is responsible + // for not calling Advance() on an iterator equal to + // BaseGenerator()->End(). + virtual void Advance() = 0; + // Clones the iterator object. Used for implementing copy semantics + // of ParamIterator<T>. + virtual ParamIteratorInterface* Clone() const = 0; + // Dereferences the current iterator and provides (read-only) access + // to the pointed value. It is the caller's responsibility not to call + // Current() on an iterator equal to BaseGenerator()->End(). + // Used for implementing ParamGenerator<T>::operator*(). + virtual const T* Current() const = 0; + // Determines whether the given iterator and other point to the same + // element in the sequence generated by the generator. + // Used for implementing ParamGenerator<T>::operator==(). + virtual bool Equals(const ParamIteratorInterface& other) const = 0; +}; + +// Class iterating over elements provided by an implementation of +// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T> +// and implements the const forward iterator concept. +template <typename T> +class ParamIterator { + public: + typedef T value_type; + typedef const T& reference; + typedef ptrdiff_t difference_type; + + // ParamIterator assumes ownership of the impl_ pointer. + ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {} + ParamIterator& operator=(const ParamIterator& other) { + if (this != &other) + impl_.reset(other.impl_->Clone()); + return *this; + } + + const T& operator*() const { return *impl_->Current(); } + const T* operator->() const { return impl_->Current(); } + // Prefix version of operator++. + ParamIterator& operator++() { + impl_->Advance(); + return *this; + } + // Postfix version of operator++. + ParamIterator operator++(int /*unused*/) { + ParamIteratorInterface<T>* clone = impl_->Clone(); + impl_->Advance(); + return ParamIterator(clone); + } + bool operator==(const ParamIterator& other) const { + return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_); + } + bool operator!=(const ParamIterator& other) const { + return !(*this == other); + } + + private: + friend class ParamGenerator<T>; + explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {} + scoped_ptr<ParamIteratorInterface<T> > impl_; +}; + +// ParamGeneratorInterface<T> is the binary interface to access generators +// defined in other translation units. +template <typename T> +class ParamGeneratorInterface { + public: + typedef T ParamType; + + virtual ~ParamGeneratorInterface() {} + + // Generator interface definition + virtual ParamIteratorInterface<T>* Begin() const = 0; + virtual ParamIteratorInterface<T>* End() const = 0; +}; + +// Wraps ParamGeneratorInterface<T> and provides general generator syntax +// compatible with the STL Container concept. +// This class implements copy initialization semantics and the contained +// ParamGeneratorInterface<T> instance is shared among all copies +// of the original object. This is possible because that instance is immutable. +template<typename T> +class ParamGenerator { + public: + typedef ParamIterator<T> iterator; + + explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {} + ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {} + + ParamGenerator& operator=(const ParamGenerator& other) { + impl_ = other.impl_; + return *this; + } + + iterator begin() const { return iterator(impl_->Begin()); } + iterator end() const { return iterator(impl_->End()); } + + private: + linked_ptr<const ParamGeneratorInterface<T> > impl_; +}; + +// Generates values from a range of two comparable values. Can be used to +// generate sequences of user-defined types that implement operator+() and +// operator<(). +// This class is used in the Range() function. +template <typename T, typename IncrementT> +class RangeGenerator : public ParamGeneratorInterface<T> { + public: + RangeGenerator(T begin, T end, IncrementT step) + : begin_(begin), end_(end), + step_(step), end_index_(CalculateEndIndex(begin, end, step)) {} + virtual ~RangeGenerator() {} + + virtual ParamIteratorInterface<T>* Begin() const { + return new Iterator(this, begin_, 0, step_); + } + virtual ParamIteratorInterface<T>* End() const { + return new Iterator(this, end_, end_index_, step_); + } + + private: + class Iterator : public ParamIteratorInterface<T> { + public: + Iterator(const ParamGeneratorInterface<T>* base, T value, int index, + IncrementT step) + : base_(base), value_(value), index_(index), step_(step) {} + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<T>* BaseGenerator() const { + return base_; + } + virtual void Advance() { + value_ = value_ + step_; + index_++; + } + virtual ParamIteratorInterface<T>* Clone() const { + return new Iterator(*this); + } + virtual const T* Current() const { return &value_; } + virtual bool Equals(const ParamIteratorInterface<T>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const int other_index = + CheckedDowncastToActualType<const Iterator>(&other)->index_; + return index_ == other_index; + } + + private: + Iterator(const Iterator& other) + : ParamIteratorInterface<T>(), + base_(other.base_), value_(other.value_), index_(other.index_), + step_(other.step_) {} + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<T>* const base_; + T value_; + int index_; + const IncrementT step_; + }; // class RangeGenerator::Iterator + + static int CalculateEndIndex(const T& begin, + const T& end, + const IncrementT& step) { + int end_index = 0; + for (T i = begin; i < end; i = i + step) + end_index++; + return end_index; + } + + // No implementation - assignment is unsupported. + void operator=(const RangeGenerator& other); + + const T begin_; + const T end_; + const IncrementT step_; + // The index for the end() iterator. All the elements in the generated + // sequence are indexed (0-based) to aid iterator comparison. + const int end_index_; +}; // class RangeGenerator + + +// Generates values from a pair of STL-style iterators. Used in the +// ValuesIn() function. The elements are copied from the source range +// since the source can be located on the stack, and the generator +// is likely to persist beyond that stack frame. +template <typename T> +class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> { + public: + template <typename ForwardIterator> + ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end) + : container_(begin, end) {} + virtual ~ValuesInIteratorRangeGenerator() {} + + virtual ParamIteratorInterface<T>* Begin() const { + return new Iterator(this, container_.begin()); + } + virtual ParamIteratorInterface<T>* End() const { + return new Iterator(this, container_.end()); + } + + private: + typedef typename ::std::vector<T> ContainerType; + + class Iterator : public ParamIteratorInterface<T> { + public: + Iterator(const ParamGeneratorInterface<T>* base, + typename ContainerType::const_iterator iterator) + : base_(base), iterator_(iterator) {} + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<T>* BaseGenerator() const { + return base_; + } + virtual void Advance() { + ++iterator_; + value_.reset(); + } + virtual ParamIteratorInterface<T>* Clone() const { + return new Iterator(*this); + } + // We need to use cached value referenced by iterator_ because *iterator_ + // can return a temporary object (and of type other then T), so just + // having "return &*iterator_;" doesn't work. + // value_ is updated here and not in Advance() because Advance() + // can advance iterator_ beyond the end of the range, and we cannot + // detect that fact. The client code, on the other hand, is + // responsible for not calling Current() on an out-of-range iterator. + virtual const T* Current() const { + if (value_.get() == NULL) + value_.reset(new T(*iterator_)); + return value_.get(); + } + virtual bool Equals(const ParamIteratorInterface<T>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + return iterator_ == + CheckedDowncastToActualType<const Iterator>(&other)->iterator_; + } + + private: + Iterator(const Iterator& other) + // The explicit constructor call suppresses a false warning + // emitted by gcc when supplied with the -Wextra option. + : ParamIteratorInterface<T>(), + base_(other.base_), + iterator_(other.iterator_) {} + + const ParamGeneratorInterface<T>* const base_; + typename ContainerType::const_iterator iterator_; + // A cached value of *iterator_. We keep it here to allow access by + // pointer in the wrapping iterator's operator->(). + // value_ needs to be mutable to be accessed in Current(). + // Use of scoped_ptr helps manage cached value's lifetime, + // which is bound by the lifespan of the iterator itself. + mutable scoped_ptr<const T> value_; + }; // class ValuesInIteratorRangeGenerator::Iterator + + // No implementation - assignment is unsupported. + void operator=(const ValuesInIteratorRangeGenerator& other); + + const ContainerType container_; +}; // class ValuesInIteratorRangeGenerator + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Stores a parameter value and later creates tests parameterized with that +// value. +template <class TestClass> +class ParameterizedTestFactory : public TestFactoryBase { + public: + typedef typename TestClass::ParamType ParamType; + explicit ParameterizedTestFactory(ParamType parameter) : + parameter_(parameter) {} + virtual Test* CreateTest() { + TestClass::SetParam(¶meter_); + return new TestClass(); + } + + private: + const ParamType parameter_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// TestMetaFactoryBase is a base class for meta-factories that create +// test factories for passing into MakeAndRegisterTestInfo function. +template <class ParamType> +class TestMetaFactoryBase { + public: + virtual ~TestMetaFactoryBase() {} + + virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0; +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// TestMetaFactory creates test factories for passing into +// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives +// ownership of test factory pointer, same factory object cannot be passed +// into that method twice. But ParameterizedTestCaseInfo is going to call +// it for each Test/Parameter value combination. Thus it needs meta factory +// creator class. +template <class TestCase> +class TestMetaFactory + : public TestMetaFactoryBase<typename TestCase::ParamType> { + public: + typedef typename TestCase::ParamType ParamType; + + TestMetaFactory() {} + + virtual TestFactoryBase* CreateTestFactory(ParamType parameter) { + return new ParameterizedTestFactory<TestCase>(parameter); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseInfoBase is a generic interface +// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase +// accumulates test information provided by TEST_P macro invocations +// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations +// and uses that information to register all resulting test instances +// in RegisterTests method. The ParameterizeTestCaseRegistry class holds +// a collection of pointers to the ParameterizedTestCaseInfo objects +// and calls RegisterTests() on each of them when asked. +class ParameterizedTestCaseInfoBase { + public: + virtual ~ParameterizedTestCaseInfoBase() {} + + // Base part of test case name for display purposes. + virtual const string& GetTestCaseName() const = 0; + // Test case id to verify identity. + virtual TypeId GetTestCaseTypeId() const = 0; + // UnitTest class invokes this method to register tests in this + // test case right before running them in RUN_ALL_TESTS macro. + // This method should not be called more then once on any single + // instance of a ParameterizedTestCaseInfoBase derived class. + virtual void RegisterTests() = 0; + + protected: + ParameterizedTestCaseInfoBase() {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P +// macro invocations for a particular test case and generators +// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that +// test case. It registers tests with all values generated by all +// generators when asked. +template <class TestCase> +class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { + public: + // ParamType and GeneratorCreationFunc are private types but are required + // for declarations of public methods AddTestPattern() and + // AddTestCaseInstantiation(). + typedef typename TestCase::ParamType ParamType; + // A function that returns an instance of appropriate generator type. + typedef ParamGenerator<ParamType>(GeneratorCreationFunc)(); + + explicit ParameterizedTestCaseInfo(const char* name) + : test_case_name_(name) {} + + // Test case base name for display purposes. + virtual const string& GetTestCaseName() const { return test_case_name_; } + // Test case id to verify identity. + virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); } + // TEST_P macro uses AddTestPattern() to record information + // about a single test in a LocalTestInfo structure. + // test_case_name is the base name of the test case (without invocation + // prefix). test_base_name is the name of an individual test without + // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is + // test case base name and DoBar is test base name. + void AddTestPattern(const char* test_case_name, + const char* test_base_name, + TestMetaFactoryBase<ParamType>* meta_factory) { + tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name, + test_base_name, + meta_factory))); + } + // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information + // about a generator. + int AddTestCaseInstantiation(const string& instantiation_name, + GeneratorCreationFunc* func, + const char* /* file */, + int /* line */) { + instantiations_.push_back(::std::make_pair(instantiation_name, func)); + return 0; // Return value used only to run this method in namespace scope. + } + // UnitTest class invokes this method to register tests in this test case + // test cases right before running tests in RUN_ALL_TESTS macro. + // This method should not be called more then once on any single + // instance of a ParameterizedTestCaseInfoBase derived class. + // UnitTest has a guard to prevent from calling this method more then once. + virtual void RegisterTests() { + for (typename TestInfoContainer::iterator test_it = tests_.begin(); + test_it != tests_.end(); ++test_it) { + linked_ptr<TestInfo> test_info = *test_it; + for (typename InstantiationContainer::iterator gen_it = + instantiations_.begin(); gen_it != instantiations_.end(); + ++gen_it) { + const string& instantiation_name = gen_it->first; + ParamGenerator<ParamType> generator((*gen_it->second)()); + + string test_case_name; + if ( !instantiation_name.empty() ) + test_case_name = instantiation_name + "/"; + test_case_name += test_info->test_case_base_name; + + int i = 0; + for (typename ParamGenerator<ParamType>::iterator param_it = + generator.begin(); + param_it != generator.end(); ++param_it, ++i) { + Message test_name_stream; + test_name_stream << test_info->test_base_name << "/" << i; + MakeAndRegisterTestInfo( + test_case_name.c_str(), + test_name_stream.GetString().c_str(), + NULL, // No type parameter. + PrintToString(*param_it).c_str(), + GetTestCaseTypeId(), + TestCase::SetUpTestCase, + TestCase::TearDownTestCase, + test_info->test_meta_factory->CreateTestFactory(*param_it)); + } // for param_it + } // for gen_it + } // for test_it + } // RegisterTests + + private: + // LocalTestInfo structure keeps information about a single test registered + // with TEST_P macro. + struct TestInfo { + TestInfo(const char* a_test_case_base_name, + const char* a_test_base_name, + TestMetaFactoryBase<ParamType>* a_test_meta_factory) : + test_case_base_name(a_test_case_base_name), + test_base_name(a_test_base_name), + test_meta_factory(a_test_meta_factory) {} + + const string test_case_base_name; + const string test_base_name; + const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory; + }; + typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer; + // Keeps pairs of <Instantiation name, Sequence generator creation function> + // received from INSTANTIATE_TEST_CASE_P macros. + typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> > + InstantiationContainer; + + const string test_case_name_; + TestInfoContainer tests_; + InstantiationContainer instantiations_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo); +}; // class ParameterizedTestCaseInfo + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase +// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P +// macros use it to locate their corresponding ParameterizedTestCaseInfo +// descriptors. +class ParameterizedTestCaseRegistry { + public: + ParameterizedTestCaseRegistry() {} + ~ParameterizedTestCaseRegistry() { + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + delete *it; + } + } + + // Looks up or creates and returns a structure containing information about + // tests and instantiations of a particular test case. + template <class TestCase> + ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder( + const char* test_case_name, + const char* file, + int line) { + ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL; + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + if ((*it)->GetTestCaseName() == test_case_name) { + if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) { + // Complain about incorrect usage of Google Test facilities + // and terminate the program since we cannot guaranty correct + // test case setup and tear-down in this case. + ReportInvalidTestCaseType(test_case_name, file, line); + posix::Abort(); + } else { + // At this point we are sure that the object we found is of the same + // type we are looking for, so we downcast it to that type + // without further checks. + typed_test_info = CheckedDowncastToActualType< + ParameterizedTestCaseInfo<TestCase> >(*it); + } + break; + } + } + if (typed_test_info == NULL) { + typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name); + test_case_infos_.push_back(typed_test_info); + } + return typed_test_info; + } + void RegisterTests() { + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + (*it)->RegisterTests(); + } + } + + private: + typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer; + + TestCaseInfoContainer test_case_infos_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry); +}; + +} // namespace internal +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ +// This file was GENERATED by command: +// pump.py gtest-param-util-generated.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// Type and function utilities for implementing parameterized tests. +// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +// Currently Google Test supports at most 50 arguments in Values, +// and at most 10 arguments in Combine. Please contact +// googletestframework@googlegroups.com if you need more. +// Please note that the number of arguments to Combine is limited +// by the maximum arity of the implementation of tr1::tuple which is +// currently set at 10. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. + +#if GTEST_HAS_PARAM_TEST + +namespace testing { + +// Forward declarations of ValuesIn(), which is implemented in +// include/gtest/gtest-param-test.h. +template <typename ForwardIterator> +internal::ParamGenerator< + typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type> +ValuesIn(ForwardIterator begin, ForwardIterator end); + +template <typename T, size_t N> +internal::ParamGenerator<T> ValuesIn(const T (&array)[N]); + +template <class Container> +internal::ParamGenerator<typename Container::value_type> ValuesIn( + const Container& container); + +namespace internal { + +// Used in the Values() function to provide polymorphic capabilities. +template <typename T1> +class ValueArray1 { + public: + explicit ValueArray1(T1 v1) : v1_(v1) {} + + template <typename T> + operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray1& other); + + const T1 v1_; +}; + +template <typename T1, typename T2> +class ValueArray2 { + public: + ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray2& other); + + const T1 v1_; + const T2 v2_; +}; + +template <typename T1, typename T2, typename T3> +class ValueArray3 { + public: + ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray3& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; +}; + +template <typename T1, typename T2, typename T3, typename T4> +class ValueArray4 { + public: + ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray4& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5> +class ValueArray5 { + public: + ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray5& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +class ValueArray6 { + public: + ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray6& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +class ValueArray7 { + public: + ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray7& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +class ValueArray8 { + public: + ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray8& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +class ValueArray9 { + public: + ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray9& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +class ValueArray10 { + public: + ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray10& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11> +class ValueArray11 { + public: + ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray11& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12> +class ValueArray12 { + public: + ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray12& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13> +class ValueArray13 { + public: + ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray13& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14> +class ValueArray14 { + public: + ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray14& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15> +class ValueArray15 { + public: + ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray15& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +class ValueArray16 { + public: + ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray16& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +class ValueArray17 { + public: + ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray17& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18> +class ValueArray18 { + public: + ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray18& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19> +class ValueArray19 { + public: + ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray19& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20> +class ValueArray20 { + public: + ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray20& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21> +class ValueArray21 { + public: + ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray21& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22> +class ValueArray22 { + public: + ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray22& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23> +class ValueArray23 { + public: + ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray23& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24> +class ValueArray24 { + public: + ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray24& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25> +class ValueArray25 { + public: + ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray25& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26> +class ValueArray26 { + public: + ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray26& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27> +class ValueArray27 { + public: + ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray27& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28> +class ValueArray28 { + public: + ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray28& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29> +class ValueArray29 { + public: + ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray29& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30> +class ValueArray30 { + public: + ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray30& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31> +class ValueArray31 { + public: + ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray31& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32> +class ValueArray32 { + public: + ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray32& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33> +class ValueArray33 { + public: + ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, + T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray33& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34> +class ValueArray34 { + public: + ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray34& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35> +class ValueArray35 { + public: + ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), + v32_(v32), v33_(v33), v34_(v34), v35_(v35) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray35& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36> +class ValueArray36 { + public: + ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), + v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray36& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37> +class ValueArray37 { + public: + ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), + v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), + v36_(v36), v37_(v37) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray37& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38> +class ValueArray38 { + public: + ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray38& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39> +class ValueArray39 { + public: + ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray39& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40> +class ValueArray40 { + public: + ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), + v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), + v40_(v40) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray40& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41> +class ValueArray41 { + public: + ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, + T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray41& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42> +class ValueArray42 { + public: + ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray42& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43> +class ValueArray43 { + public: + ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), + v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), + v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray43& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44> +class ValueArray44 { + public: + ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), + v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), + v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), + v43_(v43), v44_(v44) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray44& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45> +class ValueArray45 { + public: + ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), + v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), + v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), + v42_(v42), v43_(v43), v44_(v44), v45_(v45) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray45& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46> +class ValueArray46 { + public: + ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), + v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_), static_cast<T>(v46_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray46& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47> +class ValueArray47 { + public: + ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), + v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), + v47_(v47) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray47& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48> +class ValueArray48 { + public: + ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), + v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), + v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), + v46_(v46), v47_(v47), v48_(v48) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_), + static_cast<T>(v48_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray48& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49> +class ValueArray49 { + public: + ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, + T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), + v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_), + static_cast<T>(v48_), static_cast<T>(v49_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray49& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; + const T49 v49_; +}; + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49, typename T50> +class ValueArray50 { + public: + ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49, + T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), + v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {} + + template <typename T> + operator ParamGenerator<T>() const { + const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_), + static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_), + static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_), + static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_), + static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_), + static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_), + static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_), + static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_), + static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_), + static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_), + static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_), + static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_), + static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_), + static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_), + static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_), + static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_), + static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray50& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; + const T49 v49_; + const T50 v50_; +}; + +# if GTEST_HAS_COMBINE +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Generates values from the Cartesian product of values produced +// by the argument generators. +// +template <typename T1, typename T2> +class CartesianProductGenerator2 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > { + public: + typedef ::std::tr1::tuple<T1, T2> ParamType; + + CartesianProductGenerator2(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2) + : g1_(g1), g2_(g2) {} + virtual ~CartesianProductGenerator2() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current2_; + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + ParamType current_value_; + }; // class CartesianProductGenerator2::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator2& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; +}; // class CartesianProductGenerator2 + + +template <typename T1, typename T2, typename T3> +class CartesianProductGenerator3 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3> ParamType; + + CartesianProductGenerator3(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3) + : g1_(g1), g2_(g2), g3_(g3) {} + virtual ~CartesianProductGenerator3() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current3_; + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + ParamType current_value_; + }; // class CartesianProductGenerator3::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator3& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; +}; // class CartesianProductGenerator3 + + +template <typename T1, typename T2, typename T3, typename T4> +class CartesianProductGenerator4 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType; + + CartesianProductGenerator4(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} + virtual ~CartesianProductGenerator4() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current4_; + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + ParamType current_value_; + }; // class CartesianProductGenerator4::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator4& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; +}; // class CartesianProductGenerator4 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5> +class CartesianProductGenerator5 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType; + + CartesianProductGenerator5(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} + virtual ~CartesianProductGenerator5() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current5_; + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + ParamType current_value_; + }; // class CartesianProductGenerator5::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator5& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; +}; // class CartesianProductGenerator5 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +class CartesianProductGenerator6 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, + T6> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType; + + CartesianProductGenerator6(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5, + const ParamGenerator<T6>& g6) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} + virtual ~CartesianProductGenerator6() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5, + const ParamGenerator<T6>& g6, + const typename ParamGenerator<T6>::iterator& current6) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current6_; + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + const typename ParamGenerator<T6>::iterator begin6_; + const typename ParamGenerator<T6>::iterator end6_; + typename ParamGenerator<T6>::iterator current6_; + ParamType current_value_; + }; // class CartesianProductGenerator6::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator6& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; + const ParamGenerator<T6> g6_; +}; // class CartesianProductGenerator6 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +class CartesianProductGenerator7 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, + T7> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType; + + CartesianProductGenerator7(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5, + const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} + virtual ~CartesianProductGenerator7() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5, + const ParamGenerator<T6>& g6, + const typename ParamGenerator<T6>::iterator& current6, + const ParamGenerator<T7>& g7, + const typename ParamGenerator<T7>::iterator& current7) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current7_; + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + const typename ParamGenerator<T6>::iterator begin6_; + const typename ParamGenerator<T6>::iterator end6_; + typename ParamGenerator<T6>::iterator current6_; + const typename ParamGenerator<T7>::iterator begin7_; + const typename ParamGenerator<T7>::iterator end7_; + typename ParamGenerator<T7>::iterator current7_; + ParamType current_value_; + }; // class CartesianProductGenerator7::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator7& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; + const ParamGenerator<T6> g6_; + const ParamGenerator<T7> g7_; +}; // class CartesianProductGenerator7 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +class CartesianProductGenerator8 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, + T7, T8> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType; + + CartesianProductGenerator8(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5, + const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7, + const ParamGenerator<T8>& g8) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), + g8_(g8) {} + virtual ~CartesianProductGenerator8() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5, + const ParamGenerator<T6>& g6, + const typename ParamGenerator<T6>::iterator& current6, + const ParamGenerator<T7>& g7, + const typename ParamGenerator<T7>::iterator& current7, + const ParamGenerator<T8>& g8, + const typename ParamGenerator<T8>::iterator& current8) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current8_; + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + const typename ParamGenerator<T6>::iterator begin6_; + const typename ParamGenerator<T6>::iterator end6_; + typename ParamGenerator<T6>::iterator current6_; + const typename ParamGenerator<T7>::iterator begin7_; + const typename ParamGenerator<T7>::iterator end7_; + typename ParamGenerator<T7>::iterator current7_; + const typename ParamGenerator<T8>::iterator begin8_; + const typename ParamGenerator<T8>::iterator end8_; + typename ParamGenerator<T8>::iterator current8_; + ParamType current_value_; + }; // class CartesianProductGenerator8::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator8& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; + const ParamGenerator<T6> g6_; + const ParamGenerator<T7> g7_; + const ParamGenerator<T8> g8_; +}; // class CartesianProductGenerator8 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +class CartesianProductGenerator9 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, + T7, T8, T9> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType; + + CartesianProductGenerator9(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5, + const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7, + const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9) {} + virtual ~CartesianProductGenerator9() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end(), g9_, g9_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5, + const ParamGenerator<T6>& g6, + const typename ParamGenerator<T6>::iterator& current6, + const ParamGenerator<T7>& g7, + const typename ParamGenerator<T7>::iterator& current7, + const ParamGenerator<T8>& g8, + const typename ParamGenerator<T8>::iterator& current8, + const ParamGenerator<T9>& g9, + const typename ParamGenerator<T9>::iterator& current9) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8), + begin9_(g9.begin()), end9_(g9.end()), current9_(current9) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current9_; + if (current9_ == end9_) { + current9_ = begin9_; + ++current8_; + } + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_ && + current9_ == typed_other->current9_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_), + begin9_(other.begin9_), + end9_(other.end9_), + current9_(other.current9_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_, + *current9_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_ || + current9_ == end9_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + const typename ParamGenerator<T6>::iterator begin6_; + const typename ParamGenerator<T6>::iterator end6_; + typename ParamGenerator<T6>::iterator current6_; + const typename ParamGenerator<T7>::iterator begin7_; + const typename ParamGenerator<T7>::iterator end7_; + typename ParamGenerator<T7>::iterator current7_; + const typename ParamGenerator<T8>::iterator begin8_; + const typename ParamGenerator<T8>::iterator end8_; + typename ParamGenerator<T8>::iterator current8_; + const typename ParamGenerator<T9>::iterator begin9_; + const typename ParamGenerator<T9>::iterator end9_; + typename ParamGenerator<T9>::iterator current9_; + ParamType current_value_; + }; // class CartesianProductGenerator9::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator9& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; + const ParamGenerator<T6> g6_; + const ParamGenerator<T7> g7_; + const ParamGenerator<T8> g8_; + const ParamGenerator<T9> g9_; +}; // class CartesianProductGenerator9 + + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +class CartesianProductGenerator10 + : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, + T7, T8, T9, T10> > { + public: + typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType; + + CartesianProductGenerator10(const ParamGenerator<T1>& g1, + const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3, + const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5, + const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7, + const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9, + const ParamGenerator<T10>& g10) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9), g10_(g10) {} + virtual ~CartesianProductGenerator10() {} + + virtual ParamIteratorInterface<ParamType>* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin()); + } + virtual ParamIteratorInterface<ParamType>* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end(), g9_, g9_.end(), g10_, g10_.end()); + } + + private: + class Iterator : public ParamIteratorInterface<ParamType> { + public: + Iterator(const ParamGeneratorInterface<ParamType>* base, + const ParamGenerator<T1>& g1, + const typename ParamGenerator<T1>::iterator& current1, + const ParamGenerator<T2>& g2, + const typename ParamGenerator<T2>::iterator& current2, + const ParamGenerator<T3>& g3, + const typename ParamGenerator<T3>::iterator& current3, + const ParamGenerator<T4>& g4, + const typename ParamGenerator<T4>::iterator& current4, + const ParamGenerator<T5>& g5, + const typename ParamGenerator<T5>::iterator& current5, + const ParamGenerator<T6>& g6, + const typename ParamGenerator<T6>::iterator& current6, + const ParamGenerator<T7>& g7, + const typename ParamGenerator<T7>::iterator& current7, + const ParamGenerator<T8>& g8, + const typename ParamGenerator<T8>::iterator& current8, + const ParamGenerator<T9>& g9, + const typename ParamGenerator<T9>::iterator& current9, + const ParamGenerator<T10>& g10, + const typename ParamGenerator<T10>::iterator& current10) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8), + begin9_(g9.begin()), end9_(g9.end()), current9_(current9), + begin10_(g10.begin()), end10_(g10.end()), current10_(current10) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current10_; + if (current10_ == end10_) { + current10_ = begin10_; + ++current9_; + } + if (current9_ == end9_) { + current9_ = begin9_; + ++current8_; + } + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface<ParamType>* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType<const Iterator>(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_ && + current9_ == typed_other->current9_ && + current10_ == typed_other->current10_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_), + begin9_(other.begin9_), + end9_(other.end9_), + current9_(other.current9_), + begin10_(other.begin10_), + end10_(other.end10_), + current10_(other.current10_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_, + *current9_, *current10_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_ || + current9_ == end9_ || + current10_ == end10_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface<ParamType>* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator<T1>::iterator begin1_; + const typename ParamGenerator<T1>::iterator end1_; + typename ParamGenerator<T1>::iterator current1_; + const typename ParamGenerator<T2>::iterator begin2_; + const typename ParamGenerator<T2>::iterator end2_; + typename ParamGenerator<T2>::iterator current2_; + const typename ParamGenerator<T3>::iterator begin3_; + const typename ParamGenerator<T3>::iterator end3_; + typename ParamGenerator<T3>::iterator current3_; + const typename ParamGenerator<T4>::iterator begin4_; + const typename ParamGenerator<T4>::iterator end4_; + typename ParamGenerator<T4>::iterator current4_; + const typename ParamGenerator<T5>::iterator begin5_; + const typename ParamGenerator<T5>::iterator end5_; + typename ParamGenerator<T5>::iterator current5_; + const typename ParamGenerator<T6>::iterator begin6_; + const typename ParamGenerator<T6>::iterator end6_; + typename ParamGenerator<T6>::iterator current6_; + const typename ParamGenerator<T7>::iterator begin7_; + const typename ParamGenerator<T7>::iterator end7_; + typename ParamGenerator<T7>::iterator current7_; + const typename ParamGenerator<T8>::iterator begin8_; + const typename ParamGenerator<T8>::iterator end8_; + typename ParamGenerator<T8>::iterator current8_; + const typename ParamGenerator<T9>::iterator begin9_; + const typename ParamGenerator<T9>::iterator end9_; + typename ParamGenerator<T9>::iterator current9_; + const typename ParamGenerator<T10>::iterator begin10_; + const typename ParamGenerator<T10>::iterator end10_; + typename ParamGenerator<T10>::iterator current10_; + ParamType current_value_; + }; // class CartesianProductGenerator10::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator10& other); + + const ParamGenerator<T1> g1_; + const ParamGenerator<T2> g2_; + const ParamGenerator<T3> g3_; + const ParamGenerator<T4> g4_; + const ParamGenerator<T5> g5_; + const ParamGenerator<T6> g6_; + const ParamGenerator<T7> g7_; + const ParamGenerator<T8> g8_; + const ParamGenerator<T9> g9_; + const ParamGenerator<T10> g10_; +}; // class CartesianProductGenerator10 + + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Helper classes providing Combine() with polymorphic features. They allow +// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is +// convertible to U. +// +template <class Generator1, class Generator2> +class CartesianProductHolder2 { + public: +CartesianProductHolder2(const Generator1& g1, const Generator2& g2) + : g1_(g1), g2_(g2) {} + template <typename T1, typename T2> + operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2> >( + new CartesianProductGenerator2<T1, T2>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder2& other); + + const Generator1 g1_; + const Generator2 g2_; +}; // class CartesianProductHolder2 + +template <class Generator1, class Generator2, class Generator3> +class CartesianProductHolder3 { + public: +CartesianProductHolder3(const Generator1& g1, const Generator2& g2, + const Generator3& g3) + : g1_(g1), g2_(g2), g3_(g3) {} + template <typename T1, typename T2, typename T3> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >( + new CartesianProductGenerator3<T1, T2, T3>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder3& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; +}; // class CartesianProductHolder3 + +template <class Generator1, class Generator2, class Generator3, + class Generator4> +class CartesianProductHolder4 { + public: +CartesianProductHolder4(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} + template <typename T1, typename T2, typename T3, typename T4> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >( + new CartesianProductGenerator4<T1, T2, T3, T4>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder4& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; +}; // class CartesianProductHolder4 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5> +class CartesianProductHolder5 { + public: +CartesianProductHolder5(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >( + new CartesianProductGenerator5<T1, T2, T3, T4, T5>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder5& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; +}; // class CartesianProductHolder5 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5, class Generator6> +class CartesianProductHolder6 { + public: +CartesianProductHolder6(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >( + new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_), + static_cast<ParamGenerator<T6> >(g6_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder6& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; +}; // class CartesianProductHolder6 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5, class Generator6, class Generator7> +class CartesianProductHolder7 { + public: +CartesianProductHolder7(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, + T7> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >( + new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_), + static_cast<ParamGenerator<T6> >(g6_), + static_cast<ParamGenerator<T7> >(g7_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder7& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; +}; // class CartesianProductHolder7 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5, class Generator6, class Generator7, + class Generator8> +class CartesianProductHolder8 { + public: +CartesianProductHolder8(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), + g8_(g8) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, + T8> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >( + new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_), + static_cast<ParamGenerator<T6> >(g6_), + static_cast<ParamGenerator<T7> >(g7_), + static_cast<ParamGenerator<T8> >(g8_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder8& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; +}; // class CartesianProductHolder8 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5, class Generator6, class Generator7, + class Generator8, class Generator9> +class CartesianProductHolder9 { + public: +CartesianProductHolder9(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8, + const Generator9& g9) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, + T9> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, + T9> >( + new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_), + static_cast<ParamGenerator<T6> >(g6_), + static_cast<ParamGenerator<T7> >(g7_), + static_cast<ParamGenerator<T8> >(g8_), + static_cast<ParamGenerator<T9> >(g9_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder9& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; + const Generator9 g9_; +}; // class CartesianProductHolder9 + +template <class Generator1, class Generator2, class Generator3, + class Generator4, class Generator5, class Generator6, class Generator7, + class Generator8, class Generator9, class Generator10> +class CartesianProductHolder10 { + public: +CartesianProductHolder10(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8, + const Generator9& g9, const Generator10& g10) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9), g10_(g10) {} + template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> + operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, + T9, T10> >() const { + return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, + T9, T10> >( + new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9, + T10>( + static_cast<ParamGenerator<T1> >(g1_), + static_cast<ParamGenerator<T2> >(g2_), + static_cast<ParamGenerator<T3> >(g3_), + static_cast<ParamGenerator<T4> >(g4_), + static_cast<ParamGenerator<T5> >(g5_), + static_cast<ParamGenerator<T6> >(g6_), + static_cast<ParamGenerator<T7> >(g7_), + static_cast<ParamGenerator<T8> >(g8_), + static_cast<ParamGenerator<T9> >(g9_), + static_cast<ParamGenerator<T10> >(g10_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder10& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; + const Generator9 g9_; + const Generator10 g10_; +}; // class CartesianProductHolder10 + +# endif // GTEST_HAS_COMBINE + +} // namespace internal +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ + +#if GTEST_HAS_PARAM_TEST + +namespace testing { + +// Functions producing parameter generators. +// +// Google Test uses these generators to produce parameters for value- +// parameterized tests. When a parameterized test case is instantiated +// with a particular generator, Google Test creates and runs tests +// for each element in the sequence produced by the generator. +// +// In the following sample, tests from test case FooTest are instantiated +// each three times with parameter values 3, 5, and 8: +// +// class FooTest : public TestWithParam<int> { ... }; +// +// TEST_P(FooTest, TestThis) { +// } +// TEST_P(FooTest, TestThat) { +// } +// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8)); +// + +// Range() returns generators providing sequences of values in a range. +// +// Synopsis: +// Range(start, end) +// - returns a generator producing a sequence of values {start, start+1, +// start+2, ..., }. +// Range(start, end, step) +// - returns a generator producing a sequence of values {start, start+step, +// start+step+step, ..., }. +// Notes: +// * The generated sequences never include end. For example, Range(1, 5) +// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2) +// returns a generator producing {1, 3, 5, 7}. +// * start and end must have the same type. That type may be any integral or +// floating-point type or a user defined type satisfying these conditions: +// * It must be assignable (have operator=() defined). +// * It must have operator+() (operator+(int-compatible type) for +// two-operand version). +// * It must have operator<() defined. +// Elements in the resulting sequences will also have that type. +// * Condition start < end must be satisfied in order for resulting sequences +// to contain any elements. +// +template <typename T, typename IncrementT> +internal::ParamGenerator<T> Range(T start, T end, IncrementT step) { + return internal::ParamGenerator<T>( + new internal::RangeGenerator<T, IncrementT>(start, end, step)); +} + +template <typename T> +internal::ParamGenerator<T> Range(T start, T end) { + return Range(start, end, 1); +} + +// ValuesIn() function allows generation of tests with parameters coming from +// a container. +// +// Synopsis: +// ValuesIn(const T (&array)[N]) +// - returns a generator producing sequences with elements from +// a C-style array. +// ValuesIn(const Container& container) +// - returns a generator producing sequences with elements from +// an STL-style container. +// ValuesIn(Iterator begin, Iterator end) +// - returns a generator producing sequences with elements from +// a range [begin, end) defined by a pair of STL-style iterators. These +// iterators can also be plain C pointers. +// +// Please note that ValuesIn copies the values from the containers +// passed in and keeps them to generate tests in RUN_ALL_TESTS(). +// +// Examples: +// +// This instantiates tests from test case StringTest +// each with C-string values of "foo", "bar", and "baz": +// +// const char* strings[] = {"foo", "bar", "baz"}; +// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings)); +// +// This instantiates tests from test case StlStringTest +// each with STL strings with values "a" and "b": +// +// ::std::vector< ::std::string> GetParameterStrings() { +// ::std::vector< ::std::string> v; +// v.push_back("a"); +// v.push_back("b"); +// return v; +// } +// +// INSTANTIATE_TEST_CASE_P(CharSequence, +// StlStringTest, +// ValuesIn(GetParameterStrings())); +// +// +// This will also instantiate tests from CharTest +// each with parameter values 'a' and 'b': +// +// ::std::list<char> GetParameterChars() { +// ::std::list<char> list; +// list.push_back('a'); +// list.push_back('b'); +// return list; +// } +// ::std::list<char> l = GetParameterChars(); +// INSTANTIATE_TEST_CASE_P(CharSequence2, +// CharTest, +// ValuesIn(l.begin(), l.end())); +// +template <typename ForwardIterator> +internal::ParamGenerator< + typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type> +ValuesIn(ForwardIterator begin, ForwardIterator end) { + typedef typename ::testing::internal::IteratorTraits<ForwardIterator> + ::value_type ParamType; + return internal::ParamGenerator<ParamType>( + new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end)); +} + +template <typename T, size_t N> +internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) { + return ValuesIn(array, array + N); +} + +template <class Container> +internal::ParamGenerator<typename Container::value_type> ValuesIn( + const Container& container) { + return ValuesIn(container.begin(), container.end()); +} + +// Values() allows generating tests from explicitly specified list of +// parameters. +// +// Synopsis: +// Values(T v1, T v2, ..., T vN) +// - returns a generator producing sequences with elements v1, v2, ..., vN. +// +// For example, this instantiates tests from test case BarTest each +// with values "one", "two", and "three": +// +// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three")); +// +// This instantiates tests from test case BazTest each with values 1, 2, 3.5. +// The exact type of values will depend on the type of parameter in BazTest. +// +// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5)); +// +// Currently, Values() supports from 1 to 50 parameters. +// +template <typename T1> +internal::ValueArray1<T1> Values(T1 v1) { + return internal::ValueArray1<T1>(v1); +} + +template <typename T1, typename T2> +internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) { + return internal::ValueArray2<T1, T2>(v1, v2); +} + +template <typename T1, typename T2, typename T3> +internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) { + return internal::ValueArray3<T1, T2, T3>(v1, v2, v3); +} + +template <typename T1, typename T2, typename T3, typename T4> +internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) { + return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5> +internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5) { + return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6) { + return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7) { + return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5, + v6, v7); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) { + return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4, + v5, v6, v7, v8); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) { + return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3, + v4, v5, v6, v7, v8, v9); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10> +internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) { + return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11> +internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, + T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11) { + return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, + T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12> +internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12) { + return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13> +internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, + T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13) { + return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14> +internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) { + return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15> +internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) { + return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16) { + return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17) { + return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18> +internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18) { + return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19> +internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) { + return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20> +internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) { + return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21> +internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) { + return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22> +internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22) { + return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23> +internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23) { + return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24> +internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24) { + return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2, + v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25> +internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, + T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, + T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) { + return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, + v18, v19, v20, v21, v22, v23, v24, v25); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26> +internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26) { + return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27> +internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, + T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27) { + return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, + v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28> +internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, + T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28) { + return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, + v28); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29> +internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29) { + return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, + v27, v28, v29); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30> +internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) { + return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, + v26, v27, v28, v29, v30); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31> +internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) { + return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, + v25, v26, v27, v28, v29, v30, v31); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32> +internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32) { + return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33> +internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33) { + return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34> +internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, + T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, + T31 v31, T32 v32, T33 v33, T34 v34) { + return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, + v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35> +internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) { + return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36> +internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) { + return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37> +internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37) { + return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36, v37); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38> +internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37, T38 v38) { + return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2, + v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, + v33, v34, v35, v36, v37, v38); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39> +internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37, T38 v38, T39 v39) { + return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, + v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40> +internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, + T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, + T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, + T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, + T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) { + return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, + v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41> +internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, + T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) { + return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, + v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, + v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42> +internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, + T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42) { + return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, + v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, + v42); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43> +internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, + T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43) { + return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, + v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, + v41, v42, v43); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44> +internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44) { + return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, + v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45> +internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, + T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, + T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) { + return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, + v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, + v39, v40, v41, v42, v43, v44, v45); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46> +internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) { + return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, + v38, v39, v40, v41, v42, v43, v44, v45, v46); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47> +internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) { + return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, + v38, v39, v40, v41, v42, v43, v44, v45, v46, v47); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48> +internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, + T48 v48) { + return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, + v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, + v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49> +internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, + T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, + T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, + T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, + T47 v47, T48 v48, T49 v49) { + return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, + v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49); +} + +template <typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9, typename T10, + typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, typename T20, + typename T21, typename T22, typename T23, typename T24, typename T25, + typename T26, typename T27, typename T28, typename T29, typename T30, + typename T31, typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, typename T40, + typename T41, typename T42, typename T43, typename T44, typename T45, + typename T46, typename T47, typename T48, typename T49, typename T50> +internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, + T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, + T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, + T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, + T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, + T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) { + return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, + T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, + T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, + T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50); +} + +// Bool() allows generating tests with parameters in a set of (false, true). +// +// Synopsis: +// Bool() +// - returns a generator producing sequences with elements {false, true}. +// +// It is useful when testing code that depends on Boolean flags. Combinations +// of multiple flags can be tested when several Bool()'s are combined using +// Combine() function. +// +// In the following example all tests in the test case FlagDependentTest +// will be instantiated twice with parameters false and true. +// +// class FlagDependentTest : public testing::TestWithParam<bool> { +// virtual void SetUp() { +// external_flag = GetParam(); +// } +// } +// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool()); +// +inline internal::ParamGenerator<bool> Bool() { + return Values(false, true); +} + +# if GTEST_HAS_COMBINE +// Combine() allows the user to combine two or more sequences to produce +// values of a Cartesian product of those sequences' elements. +// +// Synopsis: +// Combine(gen1, gen2, ..., genN) +// - returns a generator producing sequences with elements coming from +// the Cartesian product of elements from the sequences generated by +// gen1, gen2, ..., genN. The sequence elements will have a type of +// tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types +// of elements from sequences produces by gen1, gen2, ..., genN. +// +// Combine can have up to 10 arguments. This number is currently limited +// by the maximum number of elements in the tuple implementation used by Google +// Test. +// +// Example: +// +// This will instantiate tests in test case AnimalTest each one with +// the parameter values tuple("cat", BLACK), tuple("cat", WHITE), +// tuple("dog", BLACK), and tuple("dog", WHITE): +// +// enum Color { BLACK, GRAY, WHITE }; +// class AnimalTest +// : public testing::TestWithParam<tuple<const char*, Color> > {...}; +// +// TEST_P(AnimalTest, AnimalLooksNice) {...} +// +// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest, +// Combine(Values("cat", "dog"), +// Values(BLACK, WHITE))); +// +// This will instantiate tests in FlagDependentTest with all variations of two +// Boolean flags: +// +// class FlagDependentTest +// : public testing::TestWithParam<tuple<bool, bool> > { +// virtual void SetUp() { +// // Assigns external_flag_1 and external_flag_2 values from the tuple. +// tie(external_flag_1, external_flag_2) = GetParam(); +// } +// }; +// +// TEST_P(FlagDependentTest, TestFeature1) { +// // Test your code using external_flag_1 and external_flag_2 here. +// } +// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest, +// Combine(Bool(), Bool())); +// +template <typename Generator1, typename Generator2> +internal::CartesianProductHolder2<Generator1, Generator2> Combine( + const Generator1& g1, const Generator2& g2) { + return internal::CartesianProductHolder2<Generator1, Generator2>( + g1, g2); +} + +template <typename Generator1, typename Generator2, typename Generator3> +internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3) { + return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>( + g1, g2, g3); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4> +internal::CartesianProductHolder4<Generator1, Generator2, Generator3, + Generator4> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4) { + return internal::CartesianProductHolder4<Generator1, Generator2, Generator3, + Generator4>( + g1, g2, g3, g4); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5> +internal::CartesianProductHolder5<Generator1, Generator2, Generator3, + Generator4, Generator5> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5) { + return internal::CartesianProductHolder5<Generator1, Generator2, Generator3, + Generator4, Generator5>( + g1, g2, g3, g4, g5); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5, typename Generator6> +internal::CartesianProductHolder6<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6) { + return internal::CartesianProductHolder6<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6>( + g1, g2, g3, g4, g5, g6); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5, typename Generator6, + typename Generator7> +internal::CartesianProductHolder7<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7) { + return internal::CartesianProductHolder7<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7>( + g1, g2, g3, g4, g5, g6, g7); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5, typename Generator6, + typename Generator7, typename Generator8> +internal::CartesianProductHolder8<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8) { + return internal::CartesianProductHolder8<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8>( + g1, g2, g3, g4, g5, g6, g7, g8); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5, typename Generator6, + typename Generator7, typename Generator8, typename Generator9> +internal::CartesianProductHolder9<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8, + Generator9> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8, const Generator9& g9) { + return internal::CartesianProductHolder9<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>( + g1, g2, g3, g4, g5, g6, g7, g8, g9); +} + +template <typename Generator1, typename Generator2, typename Generator3, + typename Generator4, typename Generator5, typename Generator6, + typename Generator7, typename Generator8, typename Generator9, + typename Generator10> +internal::CartesianProductHolder10<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8, Generator9, + Generator10> Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8, const Generator9& g9, + const Generator10& g10) { + return internal::CartesianProductHolder10<Generator1, Generator2, Generator3, + Generator4, Generator5, Generator6, Generator7, Generator8, Generator9, + Generator10>( + g1, g2, g3, g4, g5, g6, g7, g8, g9, g10); +} +# endif // GTEST_HAS_COMBINE + + + +# define TEST_P(test_case_name, test_name) \ + class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ + : public test_case_name { \ + public: \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ + virtual void TestBody(); \ + private: \ + static int AddToRegistry() { \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder<test_case_name>(\ + #test_case_name, __FILE__, __LINE__)->AddTestPattern(\ + #test_case_name, \ + #test_name, \ + new ::testing::internal::TestMetaFactory< \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ + return 0; \ + } \ + static int gtest_registering_dummy_; \ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ + }; \ + int GTEST_TEST_CLASS_NAME_(test_case_name, \ + test_name)::gtest_registering_dummy_ = \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() + +# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \ + ::testing::internal::ParamGenerator<test_case_name::ParamType> \ + gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ + int gtest_##prefix##test_case_name##_dummy_ = \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder<test_case_name>(\ + #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\ + #prefix, \ + >est_##prefix##test_case_name##_EvalGenerator_, \ + __FILE__, __LINE__) + +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Google C++ Testing Framework definitions useful in production code. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ + +// When you need to test the private or protected members of a class, +// use the FRIEND_TEST macro to declare your tests as friends of the +// class. For example: +// +// class MyClass { +// private: +// void MyMethod(); +// FRIEND_TEST(MyClassTest, MyMethod); +// }; +// +// class MyClassTest : public testing::Test { +// // ... +// }; +// +// TEST_F(MyClassTest, MyMethod) { +// // Can call MyClass::MyMethod() here. +// } + +#define FRIEND_TEST(test_case_name, test_name)\ +friend class test_case_name##_##test_name##_Test + +#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// + +#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ +#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ + +#include <iosfwd> +#include <vector> + +namespace testing { + +// A copyable object representing the result of a test part (i.e. an +// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()). +// +// Don't inherit from TestPartResult as its destructor is not virtual. +class GTEST_API_ TestPartResult { + public: + // The possible outcomes of a test part (i.e. an assertion or an + // explicit SUCCEED(), FAIL(), or ADD_FAILURE()). + enum Type { + kSuccess, // Succeeded. + kNonFatalFailure, // Failed but the test can continue. + kFatalFailure // Failed and the test should be terminated. + }; + + // C'tor. TestPartResult does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestPartResult object. + TestPartResult(Type a_type, + const char* a_file_name, + int a_line_number, + const char* a_message) + : type_(a_type), + file_name_(a_file_name == NULL ? "" : a_file_name), + line_number_(a_line_number), + summary_(ExtractSummary(a_message)), + message_(a_message) { + } + + // Gets the outcome of the test part. + Type type() const { return type_; } + + // Gets the name of the source file where the test part took place, or + // NULL if it's unknown. + const char* file_name() const { + return file_name_.empty() ? NULL : file_name_.c_str(); + } + + // Gets the line in the source file where the test part took place, + // or -1 if it's unknown. + int line_number() const { return line_number_; } + + // Gets the summary of the failure message. + const char* summary() const { return summary_.c_str(); } + + // Gets the message associated with the test part. + const char* message() const { return message_.c_str(); } + + // Returns true iff the test part passed. + bool passed() const { return type_ == kSuccess; } + + // Returns true iff the test part failed. + bool failed() const { return type_ != kSuccess; } + + // Returns true iff the test part non-fatally failed. + bool nonfatally_failed() const { return type_ == kNonFatalFailure; } + + // Returns true iff the test part fatally failed. + bool fatally_failed() const { return type_ == kFatalFailure; } + + private: + Type type_; + + // Gets the summary of the failure message by omitting the stack + // trace in it. + static std::string ExtractSummary(const char* message); + + // The name of the source file where the test part took place, or + // "" if the source file is unknown. + std::string file_name_; + // The line in the source file where the test part took place, or -1 + // if the line number is unknown. + int line_number_; + std::string summary_; // The test failure summary. + std::string message_; // The test failure message. +}; + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result); + +// An array of TestPartResult objects. +// +// Don't inherit from TestPartResultArray as its destructor is not +// virtual. +class GTEST_API_ TestPartResultArray { + public: + TestPartResultArray() {} + + // Appends the given TestPartResult to the array. + void Append(const TestPartResult& result); + + // Returns the TestPartResult at the given index (0-based). + const TestPartResult& GetTestPartResult(int index) const; + + // Returns the number of TestPartResult objects in the array. + int size() const; + + private: + std::vector<TestPartResult> array_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray); +}; + +// This interface knows how to report a test part result. +class TestPartResultReporterInterface { + public: + virtual ~TestPartResultReporterInterface() {} + + virtual void ReportTestPartResult(const TestPartResult& result) = 0; +}; + +namespace internal { + +// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a +// statement generates new fatal failures. To do so it registers itself as the +// current test part result reporter. Besides checking if fatal failures were +// reported, it only delegates the reporting to the former result reporter. +// The original result reporter is restored in the destructor. +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +class GTEST_API_ HasNewFatalFailureHelper + : public TestPartResultReporterInterface { + public: + HasNewFatalFailureHelper(); + virtual ~HasNewFatalFailureHelper(); + virtual void ReportTestPartResult(const TestPartResult& result); + bool has_new_fatal_failure() const { return has_new_fatal_failure_; } + private: + bool has_new_fatal_failure_; + TestPartResultReporterInterface* original_reporter_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper); +}; + +} // namespace internal + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ + +// This header implements typed tests and type-parameterized tests. + +// Typed (aka type-driven) tests repeat the same test for types in a +// list. You must know which types you want to test with when writing +// typed tests. Here's how you do it: + +#if 0 + +// First, define a fixture class template. It should be parameterized +// by a type. Remember to derive it from testing::Test. +template <typename T> +class FooTest : public testing::Test { + public: + ... + typedef std::list<T> List; + static T shared_; + T value_; +}; + +// Next, associate a list of types with the test case, which will be +// repeated for each type in the list. The typedef is necessary for +// the macro to parse correctly. +typedef testing::Types<char, int, unsigned int> MyTypes; +TYPED_TEST_CASE(FooTest, MyTypes); + +// If the type list contains only one type, you can write that type +// directly without Types<...>: +// TYPED_TEST_CASE(FooTest, int); + +// Then, use TYPED_TEST() instead of TEST_F() to define as many typed +// tests for this test case as you want. +TYPED_TEST(FooTest, DoesBlah) { + // Inside a test, refer to TypeParam to get the type parameter. + // Since we are inside a derived class template, C++ requires use to + // visit the members of FooTest via 'this'. + TypeParam n = this->value_; + + // To visit static members of the fixture, add the TestFixture:: + // prefix. + n += TestFixture::shared_; + + // To refer to typedefs in the fixture, add the "typename + // TestFixture::" prefix. + typename TestFixture::List values; + values.push_back(n); + ... +} + +TYPED_TEST(FooTest, HasPropertyA) { ... } + +#endif // 0 + +// Type-parameterized tests are abstract test patterns parameterized +// by a type. Compared with typed tests, type-parameterized tests +// allow you to define the test pattern without knowing what the type +// parameters are. The defined pattern can be instantiated with +// different types any number of times, in any number of translation +// units. +// +// If you are designing an interface or concept, you can define a +// suite of type-parameterized tests to verify properties that any +// valid implementation of the interface/concept should have. Then, +// each implementation can easily instantiate the test suite to verify +// that it conforms to the requirements, without having to write +// similar tests repeatedly. Here's an example: + +#if 0 + +// First, define a fixture class template. It should be parameterized +// by a type. Remember to derive it from testing::Test. +template <typename T> +class FooTest : public testing::Test { + ... +}; + +// Next, declare that you will define a type-parameterized test case +// (the _P suffix is for "parameterized" or "pattern", whichever you +// prefer): +TYPED_TEST_CASE_P(FooTest); + +// Then, use TYPED_TEST_P() to define as many type-parameterized tests +// for this type-parameterized test case as you want. +TYPED_TEST_P(FooTest, DoesBlah) { + // Inside a test, refer to TypeParam to get the type parameter. + TypeParam n = 0; + ... +} + +TYPED_TEST_P(FooTest, HasPropertyA) { ... } + +// Now the tricky part: you need to register all test patterns before +// you can instantiate them. The first argument of the macro is the +// test case name; the rest are the names of the tests in this test +// case. +REGISTER_TYPED_TEST_CASE_P(FooTest, + DoesBlah, HasPropertyA); + +// Finally, you are free to instantiate the pattern with the types you +// want. If you put the above code in a header file, you can #include +// it in multiple C++ source files and instantiate it multiple times. +// +// To distinguish different instances of the pattern, the first +// argument to the INSTANTIATE_* macro is a prefix that will be added +// to the actual test case name. Remember to pick unique prefixes for +// different instances. +typedef testing::Types<char, int, unsigned int> MyTypes; +INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); + +// If the type list contains only one type, you can write that type +// directly without Types<...>: +// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int); + +#endif // 0 + + +// Implements typed tests. + +#if GTEST_HAS_TYPED_TEST + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the name of the typedef for the type parameters of the +// given test case. +# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_ + +// The 'Types' template argument below must have spaces around it +// since some compilers may choke on '>>' when passing a template +// instance (e.g. Types<int>) +# define TYPED_TEST_CASE(CaseName, Types) \ + typedef ::testing::internal::TypeList< Types >::type \ + GTEST_TYPE_PARAMS_(CaseName) + +# define TYPED_TEST(CaseName, TestName) \ + template <typename gtest_TypeParam_> \ + class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ + : public CaseName<gtest_TypeParam_> { \ + private: \ + typedef CaseName<gtest_TypeParam_> TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTest< \ + CaseName, \ + ::testing::internal::TemplateSel< \ + GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \ + GTEST_TYPE_PARAMS_(CaseName)>::Register(\ + "", #CaseName, #TestName, 0); \ + template <typename gtest_TypeParam_> \ + void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody() + +#endif // GTEST_HAS_TYPED_TEST + +// Implements type-parameterized tests. + +#if GTEST_HAS_TYPED_TEST_P + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the namespace name that the type-parameterized tests for +// the given type-parameterized test case are defined in. The exact +// name of the namespace is subject to change without notice. +# define GTEST_CASE_NAMESPACE_(TestCaseName) \ + gtest_case_##TestCaseName##_ + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the name of the variable used to remember the names of +// the defined tests in the given test case. +# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \ + gtest_typed_test_case_p_state_##TestCaseName##_ + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY. +// +// Expands to the name of the variable used to remember the names of +// the registered tests in the given test case. +# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \ + gtest_registered_test_names_##TestCaseName##_ + +// The variables defined in the type-parameterized test macros are +// static as typically these macros are used in a .h file that can be +// #included in multiple translation units linked together. +# define TYPED_TEST_CASE_P(CaseName) \ + static ::testing::internal::TypedTestCasePState \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName) + +# define TYPED_TEST_P(CaseName, TestName) \ + namespace GTEST_CASE_NAMESPACE_(CaseName) { \ + template <typename gtest_TypeParam_> \ + class TestName : public CaseName<gtest_TypeParam_> { \ + private: \ + typedef CaseName<gtest_TypeParam_> TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\ + __FILE__, __LINE__, #CaseName, #TestName); \ + } \ + template <typename gtest_TypeParam_> \ + void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody() + +# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \ + namespace GTEST_CASE_NAMESPACE_(CaseName) { \ + typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \ + } \ + static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\ + __FILE__, __LINE__, #__VA_ARGS__) + +// The 'Types' template argument below must have spaces around it +// since some compilers may choke on '>>' when passing a template +// instance (e.g. Types<int>) +# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \ + bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTestCase<CaseName, \ + GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \ + ::testing::internal::TypeList< Types >::type>::Register(\ + #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName)) + +#endif // GTEST_HAS_TYPED_TEST_P + +#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ + +// Depending on the platform, different string classes are available. +// On Linux, in addition to ::std::string, Google also makes use of +// class ::string, which has the same interface as ::std::string, but +// has a different implementation. +// +// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that +// ::string is available AND is a distinct type to ::std::string, or +// define it to 0 to indicate otherwise. +// +// If the user's ::std::string and ::string are the same class due to +// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0. +// +// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined +// heuristically. + +namespace testing { + +// Declares the flags. + +// This flag temporary enables the disabled tests. +GTEST_DECLARE_bool_(also_run_disabled_tests); + +// This flag brings the debugger on an assertion failure. +GTEST_DECLARE_bool_(break_on_failure); + +// This flag controls whether Google Test catches all test-thrown exceptions +// and logs them as failures. +GTEST_DECLARE_bool_(catch_exceptions); + +// This flag enables using colors in terminal output. Available values are +// "yes" to enable colors, "no" (disable colors), or "auto" (the default) +// to let Google Test decide. +GTEST_DECLARE_string_(color); + +// This flag sets up the filter to select by name using a glob pattern +// the tests to run. If the filter is not given all tests are executed. +GTEST_DECLARE_string_(filter); + +// This flag causes the Google Test to list tests. None of the tests listed +// are actually run if the flag is provided. +GTEST_DECLARE_bool_(list_tests); + +// This flag controls whether Google Test emits a detailed XML report to a file +// in addition to its normal textual output. +GTEST_DECLARE_string_(output); + +// This flags control whether Google Test prints the elapsed time for each +// test. +GTEST_DECLARE_bool_(print_time); + +// This flag specifies the random number seed. +GTEST_DECLARE_int32_(random_seed); + +// This flag sets how many times the tests are repeated. The default value +// is 1. If the value is -1 the tests are repeating forever. +GTEST_DECLARE_int32_(repeat); + +// This flag controls whether Google Test includes Google Test internal +// stack frames in failure stack traces. +GTEST_DECLARE_bool_(show_internal_stack_frames); + +// When this flag is specified, tests' order is randomized on every iteration. +GTEST_DECLARE_bool_(shuffle); + +// This flag specifies the maximum number of stack frames to be +// printed in a failure message. +GTEST_DECLARE_int32_(stack_trace_depth); + +// When this flag is specified, a failed assertion will throw an +// exception if exceptions are enabled, or exit the program with a +// non-zero code otherwise. +GTEST_DECLARE_bool_(throw_on_failure); + +// When this flag is set with a "host:port" string, on supported +// platforms test results are streamed to the specified port on +// the specified host machine. +GTEST_DECLARE_string_(stream_result_to); + +// The upper limit for valid stack trace depths. +const int kMaxStackTraceDepth = 100; + +namespace internal { + +class AssertHelper; +class DefaultGlobalTestPartResultReporter; +class ExecDeathTest; +class NoExecDeathTest; +class FinalSuccessChecker; +class GTestFlagSaver; +class StreamingListenerTest; +class TestResultAccessor; +class TestEventListenersAccessor; +class TestEventRepeater; +class UnitTestRecordPropertyTestHelper; +class WindowsDeathTest; +class UnitTestImpl* GetUnitTestImpl(); +void ReportFailureInUnknownLocation(TestPartResult::Type result_type, + const std::string& message); + +} // namespace internal + +// The friend relationship of some of these classes is cyclic. +// If we don't forward declare them the compiler might confuse the classes +// in friendship clauses with same named classes on the scope. +class Test; +class TestCase; +class TestInfo; +class UnitTest; + +// A class for indicating whether an assertion was successful. When +// the assertion wasn't successful, the AssertionResult object +// remembers a non-empty message that describes how it failed. +// +// To create an instance of this class, use one of the factory functions +// (AssertionSuccess() and AssertionFailure()). +// +// This class is useful for two purposes: +// 1. Defining predicate functions to be used with Boolean test assertions +// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts +// 2. Defining predicate-format functions to be +// used with predicate assertions (ASSERT_PRED_FORMAT*, etc). +// +// For example, if you define IsEven predicate: +// +// testing::AssertionResult IsEven(int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess(); +// else +// return testing::AssertionFailure() << n << " is odd"; +// } +// +// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5))) +// will print the message +// +// Value of: IsEven(Fib(5)) +// Actual: false (5 is odd) +// Expected: true +// +// instead of a more opaque +// +// Value of: IsEven(Fib(5)) +// Actual: false +// Expected: true +// +// in case IsEven is a simple Boolean predicate. +// +// If you expect your predicate to be reused and want to support informative +// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up +// about half as often as positive ones in our tests), supply messages for +// both success and failure cases: +// +// testing::AssertionResult IsEven(int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess() << n << " is even"; +// else +// return testing::AssertionFailure() << n << " is odd"; +// } +// +// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print +// +// Value of: IsEven(Fib(6)) +// Actual: true (8 is even) +// Expected: false +// +// NB: Predicates that support negative Boolean assertions have reduced +// performance in positive ones so be careful not to use them in tests +// that have lots (tens of thousands) of positive Boolean assertions. +// +// To use this class with EXPECT_PRED_FORMAT assertions such as: +// +// // Verifies that Foo() returns an even number. +// EXPECT_PRED_FORMAT1(IsEven, Foo()); +// +// you need to define: +// +// testing::AssertionResult IsEven(const char* expr, int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess(); +// else +// return testing::AssertionFailure() +// << "Expected: " << expr << " is even\n Actual: it's " << n; +// } +// +// If Foo() returns 5, you will see the following message: +// +// Expected: Foo() is even +// Actual: it's 5 +// +class GTEST_API_ AssertionResult { + public: + // Copy constructor. + // Used in EXPECT_TRUE/FALSE(assertion_result). + AssertionResult(const AssertionResult& other); + // Used in the EXPECT_TRUE/FALSE(bool_expression). + explicit AssertionResult(bool success) : success_(success) {} + + // Returns true iff the assertion succeeded. + operator bool() const { return success_; } // NOLINT + + // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. + AssertionResult operator!() const; + + // Returns the text streamed into this AssertionResult. Test assertions + // use it when they fail (i.e., the predicate's outcome doesn't match the + // assertion's expectation). When nothing has been streamed into the + // object, returns an empty string. + const char* message() const { + return message_.get() != NULL ? message_->c_str() : ""; + } + // TODO(vladl@google.com): Remove this after making sure no clients use it. + // Deprecated; please use message() instead. + const char* failure_message() const { return message(); } + + // Streams a custom failure message into this object. + template <typename T> AssertionResult& operator<<(const T& value) { + AppendMessage(Message() << value); + return *this; + } + + // Allows streaming basic output manipulators such as endl or flush into + // this object. + AssertionResult& operator<<( + ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) { + AppendMessage(Message() << basic_manipulator); + return *this; + } + + private: + // Appends the contents of message to message_. + void AppendMessage(const Message& a_message) { + if (message_.get() == NULL) + message_.reset(new ::std::string); + message_->append(a_message.GetString().c_str()); + } + + // Stores result of the assertion predicate. + bool success_; + // Stores the message describing the condition in case the expectation + // construct is not satisfied with the predicate's outcome. + // Referenced via a pointer to avoid taking too much stack frame space + // with test assertions. + internal::scoped_ptr< ::std::string> message_; + + GTEST_DISALLOW_ASSIGN_(AssertionResult); +}; + +// Makes a successful assertion result. +GTEST_API_ AssertionResult AssertionSuccess(); + +// Makes a failed assertion result. +GTEST_API_ AssertionResult AssertionFailure(); + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << msg. +GTEST_API_ AssertionResult AssertionFailure(const Message& msg); + +// The abstract class that all tests inherit from. +// +// In Google Test, a unit test program contains one or many TestCases, and +// each TestCase contains one or many Tests. +// +// When you define a test using the TEST macro, you don't need to +// explicitly derive from Test - the TEST macro automatically does +// this for you. +// +// The only time you derive from Test is when defining a test fixture +// to be used a TEST_F. For example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { ... } +// virtual void TearDown() { ... } +// ... +// }; +// +// TEST_F(FooTest, Bar) { ... } +// TEST_F(FooTest, Baz) { ... } +// +// Test is not copyable. +class GTEST_API_ Test { + public: + friend class TestInfo; + + // Defines types for pointers to functions that set up and tear down + // a test case. + typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc; + typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc; + + // The d'tor is virtual as we intend to inherit from Test. + virtual ~Test(); + + // Sets up the stuff shared by all tests in this test case. + // + // Google Test will call Foo::SetUpTestCase() before running the first + // test in test case Foo. Hence a sub-class can define its own + // SetUpTestCase() method to shadow the one defined in the super + // class. + static void SetUpTestCase() {} + + // Tears down the stuff shared by all tests in this test case. + // + // Google Test will call Foo::TearDownTestCase() after running the last + // test in test case Foo. Hence a sub-class can define its own + // TearDownTestCase() method to shadow the one defined in the super + // class. + static void TearDownTestCase() {} + + // Returns true iff the current test has a fatal failure. + static bool HasFatalFailure(); + + // Returns true iff the current test has a non-fatal failure. + static bool HasNonfatalFailure(); + + // Returns true iff the current test has a (either fatal or + // non-fatal) failure. + static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); } + + // Logs a property for the current test, test case, or for the entire + // invocation of the test program when used outside of the context of a + // test case. Only the last value for a given key is remembered. These + // are public static so they can be called from utility functions that are + // not members of the test fixture. Calls to RecordProperty made during + // lifespan of the test (from the moment its constructor starts to the + // moment its destructor finishes) will be output in XML as attributes of + // the <testcase> element. Properties recorded from fixture's + // SetUpTestCase or TearDownTestCase are logged as attributes of the + // corresponding <testsuite> element. Calls to RecordProperty made in the + // global context (before or after invocation of RUN_ALL_TESTS and from + // SetUp/TearDown method of Environment objects registered with Google + // Test) will be output as attributes of the <testsuites> element. + static void RecordProperty(const std::string& key, const std::string& value); + static void RecordProperty(const std::string& key, int value); + + protected: + // Creates a Test object. + Test(); + + // Sets up the test fixture. + virtual void SetUp(); + + // Tears down the test fixture. + virtual void TearDown(); + + private: + // Returns true iff the current test has the same fixture class as + // the first test in the current test case. + static bool HasSameFixtureClass(); + + // Runs the test after the test fixture has been set up. + // + // A sub-class must implement this to define the test logic. + // + // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM. + // Instead, use the TEST or TEST_F macro. + virtual void TestBody() = 0; + + // Sets up, executes, and tears down the test. + void Run(); + + // Deletes self. We deliberately pick an unusual name for this + // internal method to avoid clashing with names used in user TESTs. + void DeleteSelf_() { delete this; } + + // Uses a GTestFlagSaver to save and restore all Google Test flags. + const internal::GTestFlagSaver* const gtest_flag_saver_; + + // Often a user mis-spells SetUp() as Setup() and spends a long time + // wondering why it is never called by Google Test. The declaration of + // the following method is solely for catching such an error at + // compile time: + // + // - The return type is deliberately chosen to be not void, so it + // will be a conflict if a user declares void Setup() in his test + // fixture. + // + // - This method is private, so it will be another compiler error + // if a user calls it from his test fixture. + // + // DO NOT OVERRIDE THIS FUNCTION. + // + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } + + // We disallow copying Tests. + GTEST_DISALLOW_COPY_AND_ASSIGN_(Test); +}; + +typedef internal::TimeInMillis TimeInMillis; + +// A copyable object representing a user specified test property which can be +// output as a key/value string pair. +// +// Don't inherit from TestProperty as its destructor is not virtual. +class TestProperty { + public: + // C'tor. TestProperty does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestProperty object. + TestProperty(const std::string& a_key, const std::string& a_value) : + key_(a_key), value_(a_value) { + } + + // Gets the user supplied key. + const char* key() const { + return key_.c_str(); + } + + // Gets the user supplied value. + const char* value() const { + return value_.c_str(); + } + + // Sets a new value, overriding the one supplied in the constructor. + void SetValue(const std::string& new_value) { + value_ = new_value; + } + + private: + // The key supplied by the user. + std::string key_; + // The value supplied by the user. + std::string value_; +}; + +// The result of a single Test. This includes a list of +// TestPartResults, a list of TestProperties, a count of how many +// death tests there are in the Test, and how much time it took to run +// the Test. +// +// TestResult is not copyable. +class GTEST_API_ TestResult { + public: + // Creates an empty TestResult. + TestResult(); + + // D'tor. Do not inherit from TestResult. + ~TestResult(); + + // Gets the number of all test parts. This is the sum of the number + // of successful test parts and the number of failed test parts. + int total_part_count() const; + + // Returns the number of the test properties. + int test_property_count() const; + + // Returns true iff the test passed (i.e. no test part failed). + bool Passed() const { return !Failed(); } + + // Returns true iff the test failed. + bool Failed() const; + + // Returns true iff the test fatally failed. + bool HasFatalFailure() const; + + // Returns true iff the test has a non-fatal failure. + bool HasNonfatalFailure() const; + + // Returns the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns the i-th test part result among all the results. i can range + // from 0 to test_property_count() - 1. If i is not in that range, aborts + // the program. + const TestPartResult& GetTestPartResult(int i) const; + + // Returns the i-th test property. i can range from 0 to + // test_property_count() - 1. If i is not in that range, aborts the + // program. + const TestProperty& GetTestProperty(int i) const; + + private: + friend class TestInfo; + friend class TestCase; + friend class UnitTest; + friend class internal::DefaultGlobalTestPartResultReporter; + friend class internal::ExecDeathTest; + friend class internal::TestResultAccessor; + friend class internal::UnitTestImpl; + friend class internal::WindowsDeathTest; + + // Gets the vector of TestPartResults. + const std::vector<TestPartResult>& test_part_results() const { + return test_part_results_; + } + + // Gets the vector of TestProperties. + const std::vector<TestProperty>& test_properties() const { + return test_properties_; + } + + // Sets the elapsed time. + void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; } + + // Adds a test property to the list. The property is validated and may add + // a non-fatal failure if invalid (e.g., if it conflicts with reserved + // key names). If a property is already recorded for the same key, the + // value will be updated, rather than storing multiple values for the same + // key. xml_element specifies the element for which the property is being + // recorded and is used for validation. + void RecordProperty(const std::string& xml_element, + const TestProperty& test_property); + + // Adds a failure if the key is a reserved attribute of Google Test + // testcase tags. Returns true if the property is valid. + // TODO(russr): Validate attribute names are legal and human readable. + static bool ValidateTestProperty(const std::string& xml_element, + const TestProperty& test_property); + + // Adds a test part result to the list. + void AddTestPartResult(const TestPartResult& test_part_result); + + // Returns the death test count. + int death_test_count() const { return death_test_count_; } + + // Increments the death test count, returning the new count. + int increment_death_test_count() { return ++death_test_count_; } + + // Clears the test part results. + void ClearTestPartResults(); + + // Clears the object. + void Clear(); + + // Protects mutable state of the property vector and of owned + // properties, whose values may be updated. + internal::Mutex test_properites_mutex_; + + // The vector of TestPartResults + std::vector<TestPartResult> test_part_results_; + // The vector of TestProperties + std::vector<TestProperty> test_properties_; + // Running count of death tests. + int death_test_count_; + // The elapsed time, in milliseconds. + TimeInMillis elapsed_time_; + + // We disallow copying TestResult. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult); +}; // class TestResult + +// A TestInfo object stores the following information about a test: +// +// Test case name +// Test name +// Whether the test should be run +// A function pointer that creates the test object when invoked +// Test result +// +// The constructor of TestInfo registers itself with the UnitTest +// singleton such that the RUN_ALL_TESTS() macro knows which tests to +// run. +class GTEST_API_ TestInfo { + public: + // Destructs a TestInfo object. This function is not virtual, so + // don't inherit from TestInfo. + ~TestInfo(); + + // Returns the test case name. + const char* test_case_name() const { return test_case_name_.c_str(); } + + // Returns the test name. + const char* name() const { return name_.c_str(); } + + // Returns the name of the parameter type, or NULL if this is not a typed + // or a type-parameterized test. + const char* type_param() const { + if (type_param_.get() != NULL) + return type_param_->c_str(); + return NULL; + } + + // Returns the text representation of the value parameter, or NULL if this + // is not a value-parameterized test. + const char* value_param() const { + if (value_param_.get() != NULL) + return value_param_->c_str(); + return NULL; + } + + // Returns true if this test should run, that is if the test is not + // disabled (or it is disabled but the also_run_disabled_tests flag has + // been specified) and its full name matches the user-specified filter. + // + // Google Test allows the user to filter the tests by their full names. + // The full name of a test Bar in test case Foo is defined as + // "Foo.Bar". Only the tests that match the filter will run. + // + // A filter is a colon-separated list of glob (not regex) patterns, + // optionally followed by a '-' and a colon-separated list of + // negative patterns (tests to exclude). A test is run if it + // matches one of the positive patterns and does not match any of + // the negative patterns. + // + // For example, *A*:Foo.* is a filter that matches any string that + // contains the character 'A' or starts with "Foo.". + bool should_run() const { return should_run_; } + + // Returns true iff this test will appear in the XML report. + bool is_reportable() const { + // For now, the XML report includes all tests matching the filter. + // In the future, we may trim tests that are excluded because of + // sharding. + return matches_filter_; + } + + // Returns the result of the test. + const TestResult* result() const { return &result_; } + + private: +#if GTEST_HAS_DEATH_TEST + friend class internal::DefaultDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + friend class Test; + friend class TestCase; + friend class internal::UnitTestImpl; + friend class internal::StreamingListenerTest; + friend TestInfo* internal::MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + internal::TypeId fixture_class_id, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + internal::TestFactoryBase* factory); + + // Constructs a TestInfo object. The newly constructed instance assumes + // ownership of the factory object. + TestInfo(const std::string& test_case_name, + const std::string& name, + const char* a_type_param, // NULL if not a type-parameterized test + const char* a_value_param, // NULL if not a value-parameterized test + internal::TypeId fixture_class_id, + internal::TestFactoryBase* factory); + + // Increments the number of death tests encountered in this test so + // far. + int increment_death_test_count() { + return result_.increment_death_test_count(); + } + + // Creates the test object, runs it, records its result, and then + // deletes it. + void Run(); + + static void ClearTestResult(TestInfo* test_info) { + test_info->result_.Clear(); + } + + // These fields are immutable properties of the test. + const std::string test_case_name_; // Test case name + const std::string name_; // Test name + // Name of the parameter type, or NULL if this is not a typed or a + // type-parameterized test. + const internal::scoped_ptr<const ::std::string> type_param_; + // Text representation of the value parameter, or NULL if this is not a + // value-parameterized test. + const internal::scoped_ptr<const ::std::string> value_param_; + const internal::TypeId fixture_class_id_; // ID of the test fixture class + bool should_run_; // True iff this test should run + bool is_disabled_; // True iff this test is disabled + bool matches_filter_; // True if this test matches the + // user-specified filter. + internal::TestFactoryBase* const factory_; // The factory that creates + // the test object + + // This field is mutable and needs to be reset before running the + // test for the second time. + TestResult result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo); +}; + +// A test case, which consists of a vector of TestInfos. +// +// TestCase is not copyable. +class GTEST_API_ TestCase { + public: + // Creates a TestCase with the given name. + // + // TestCase does NOT have a default constructor. Always use this + // constructor to create a TestCase object. + // + // Arguments: + // + // name: name of the test case + // a_type_param: the name of the test's type parameter, or NULL if + // this is not a type-parameterized test. + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase(const char* name, const char* a_type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Destructor of TestCase. + virtual ~TestCase(); + + // Gets the name of the TestCase. + const char* name() const { return name_.c_str(); } + + // Returns the name of the parameter type, or NULL if this is not a + // type-parameterized test case. + const char* type_param() const { + if (type_param_.get() != NULL) + return type_param_->c_str(); + return NULL; + } + + // Returns true if any test in this test case should run. + bool should_run() const { return should_run_; } + + // Gets the number of successful tests in this test case. + int successful_test_count() const; + + // Gets the number of failed tests in this test case. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests in this test case. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Get the number of tests in this test case that should run. + int test_to_run_count() const; + + // Gets the number of all tests in this test case. + int total_test_count() const; + + // Returns true iff the test case passed. + bool Passed() const { return !Failed(); } + + // Returns true iff the test case failed. + bool Failed() const { return failed_test_count() > 0; } + + // Returns the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns the i-th test among all the tests. i can range from 0 to + // total_test_count() - 1. If i is not in that range, returns NULL. + const TestInfo* GetTestInfo(int i) const; + + // Returns the TestResult that holds test properties recorded during + // execution of SetUpTestCase and TearDownTestCase. + const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; } + + private: + friend class Test; + friend class internal::UnitTestImpl; + + // Gets the (mutable) vector of TestInfos in this TestCase. + std::vector<TestInfo*>& test_info_list() { return test_info_list_; } + + // Gets the (immutable) vector of TestInfos in this TestCase. + const std::vector<TestInfo*>& test_info_list() const { + return test_info_list_; + } + + // Returns the i-th test among all the tests. i can range from 0 to + // total_test_count() - 1. If i is not in that range, returns NULL. + TestInfo* GetMutableTestInfo(int i); + + // Sets the should_run member. + void set_should_run(bool should) { should_run_ = should; } + + // Adds a TestInfo to this test case. Will delete the TestInfo upon + // destruction of the TestCase object. + void AddTestInfo(TestInfo * test_info); + + // Clears the results of all tests in this test case. + void ClearResult(); + + // Clears the results of all tests in the given test case. + static void ClearTestCaseResult(TestCase* test_case) { + test_case->ClearResult(); + } + + // Runs every test in this TestCase. + void Run(); + + // Runs SetUpTestCase() for this TestCase. This wrapper is needed + // for catching exceptions thrown from SetUpTestCase(). + void RunSetUpTestCase() { (*set_up_tc_)(); } + + // Runs TearDownTestCase() for this TestCase. This wrapper is + // needed for catching exceptions thrown from TearDownTestCase(). + void RunTearDownTestCase() { (*tear_down_tc_)(); } + + // Returns true iff test passed. + static bool TestPassed(const TestInfo* test_info) { + return test_info->should_run() && test_info->result()->Passed(); + } + + // Returns true iff test failed. + static bool TestFailed(const TestInfo* test_info) { + return test_info->should_run() && test_info->result()->Failed(); + } + + // Returns true iff the test is disabled and will be reported in the XML + // report. + static bool TestReportableDisabled(const TestInfo* test_info) { + return test_info->is_reportable() && test_info->is_disabled_; + } + + // Returns true iff test is disabled. + static bool TestDisabled(const TestInfo* test_info) { + return test_info->is_disabled_; + } + + // Returns true iff this test will appear in the XML report. + static bool TestReportable(const TestInfo* test_info) { + return test_info->is_reportable(); + } + + // Returns true if the given test should run. + static bool ShouldRunTest(const TestInfo* test_info) { + return test_info->should_run(); + } + + // Shuffles the tests in this test case. + void ShuffleTests(internal::Random* random); + + // Restores the test order to before the first shuffle. + void UnshuffleTests(); + + // Name of the test case. + std::string name_; + // Name of the parameter type, or NULL if this is not a typed or a + // type-parameterized test. + const internal::scoped_ptr<const ::std::string> type_param_; + // The vector of TestInfos in their original order. It owns the + // elements in the vector. + std::vector<TestInfo*> test_info_list_; + // Provides a level of indirection for the test list to allow easy + // shuffling and restoring the test order. The i-th element in this + // vector is the index of the i-th test in the shuffled test list. + std::vector<int> test_indices_; + // Pointer to the function that sets up the test case. + Test::SetUpTestCaseFunc set_up_tc_; + // Pointer to the function that tears down the test case. + Test::TearDownTestCaseFunc tear_down_tc_; + // True iff any test in this test case should run. + bool should_run_; + // Elapsed time, in milliseconds. + TimeInMillis elapsed_time_; + // Holds test properties recorded during execution of SetUpTestCase and + // TearDownTestCase. + TestResult ad_hoc_test_result_; + + // We disallow copying TestCases. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase); +}; + +// An Environment object is capable of setting up and tearing down an +// environment. The user should subclass this to define his own +// environment(s). +// +// An Environment object does the set-up and tear-down in virtual +// methods SetUp() and TearDown() instead of the constructor and the +// destructor, as: +// +// 1. You cannot safely throw from a destructor. This is a problem +// as in some cases Google Test is used where exceptions are enabled, and +// we may want to implement ASSERT_* using exceptions where they are +// available. +// 2. You cannot use ASSERT_* directly in a constructor or +// destructor. +class Environment { + public: + // The d'tor is virtual as we need to subclass Environment. + virtual ~Environment() {} + + // Override this to define how to set up the environment. + virtual void SetUp() {} + + // Override this to define how to tear down the environment. + virtual void TearDown() {} + private: + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } +}; + +// The interface for tracing execution of tests. The methods are organized in +// the order the corresponding events are fired. +class TestEventListener { + public: + virtual ~TestEventListener() {} + + // Fired before any test activity starts. + virtual void OnTestProgramStart(const UnitTest& unit_test) = 0; + + // Fired before each iteration of tests starts. There may be more than + // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration + // index, starting from 0. + virtual void OnTestIterationStart(const UnitTest& unit_test, + int iteration) = 0; + + // Fired before environment set-up for each iteration of tests starts. + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0; + + // Fired after environment set-up for each iteration of tests ends. + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0; + + // Fired before the test case starts. + virtual void OnTestCaseStart(const TestCase& test_case) = 0; + + // Fired before the test starts. + virtual void OnTestStart(const TestInfo& test_info) = 0; + + // Fired after a failed assertion or a SUCCEED() invocation. + virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0; + + // Fired after the test ends. + virtual void OnTestEnd(const TestInfo& test_info) = 0; + + // Fired after the test case ends. + virtual void OnTestCaseEnd(const TestCase& test_case) = 0; + + // Fired before environment tear-down for each iteration of tests starts. + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0; + + // Fired after environment tear-down for each iteration of tests ends. + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0; + + // Fired after each iteration of tests finishes. + virtual void OnTestIterationEnd(const UnitTest& unit_test, + int iteration) = 0; + + // Fired after all test activities have ended. + virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0; +}; + +// The convenience class for users who need to override just one or two +// methods and are not concerned that a possible change to a signature of +// the methods they override will not be caught during the build. For +// comments about each method please see the definition of TestEventListener +// above. +class EmptyTestEventListener : public TestEventListener { + public: + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, + int /*iteration*/) {} + virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {} + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestCaseStart(const TestCase& /*test_case*/) {} + virtual void OnTestStart(const TestInfo& /*test_info*/) {} + virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {} + virtual void OnTestEnd(const TestInfo& /*test_info*/) {} + virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {} + virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {} + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, + int /*iteration*/) {} + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} +}; + +// TestEventListeners lets users add listeners to track events in Google Test. +class GTEST_API_ TestEventListeners { + public: + TestEventListeners(); + ~TestEventListeners(); + + // Appends an event listener to the end of the list. Google Test assumes + // the ownership of the listener (i.e. it will delete the listener when + // the test program finishes). + void Append(TestEventListener* listener); + + // Removes the given event listener from the list and returns it. It then + // becomes the caller's responsibility to delete the listener. Returns + // NULL if the listener is not found in the list. + TestEventListener* Release(TestEventListener* listener); + + // Returns the standard listener responsible for the default console + // output. Can be removed from the listeners list to shut down default + // console output. Note that removing this object from the listener list + // with Release transfers its ownership to the caller and makes this + // function return NULL the next time. + TestEventListener* default_result_printer() const { + return default_result_printer_; + } + + // Returns the standard listener responsible for the default XML output + // controlled by the --gtest_output=xml flag. Can be removed from the + // listeners list by users who want to shut down the default XML output + // controlled by this flag and substitute it with custom one. Note that + // removing this object from the listener list with Release transfers its + // ownership to the caller and makes this function return NULL the next + // time. + TestEventListener* default_xml_generator() const { + return default_xml_generator_; + } + + private: + friend class TestCase; + friend class TestInfo; + friend class internal::DefaultGlobalTestPartResultReporter; + friend class internal::NoExecDeathTest; + friend class internal::TestEventListenersAccessor; + friend class internal::UnitTestImpl; + + // Returns repeater that broadcasts the TestEventListener events to all + // subscribers. + TestEventListener* repeater(); + + // Sets the default_result_printer attribute to the provided listener. + // The listener is also added to the listener list and previous + // default_result_printer is removed from it and deleted. The listener can + // also be NULL in which case it will not be added to the list. Does + // nothing if the previous and the current listener objects are the same. + void SetDefaultResultPrinter(TestEventListener* listener); + + // Sets the default_xml_generator attribute to the provided listener. The + // listener is also added to the listener list and previous + // default_xml_generator is removed from it and deleted. The listener can + // also be NULL in which case it will not be added to the list. Does + // nothing if the previous and the current listener objects are the same. + void SetDefaultXmlGenerator(TestEventListener* listener); + + // Controls whether events will be forwarded by the repeater to the + // listeners in the list. + bool EventForwardingEnabled() const; + void SuppressEventForwarding(); + + // The actual list of listeners. + internal::TestEventRepeater* repeater_; + // Listener responsible for the standard result output. + TestEventListener* default_result_printer_; + // Listener responsible for the creation of the XML output file. + TestEventListener* default_xml_generator_; + + // We disallow copying TestEventListeners. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners); +}; + +// A UnitTest consists of a vector of TestCases. +// +// This is a singleton class. The only instance of UnitTest is +// created when UnitTest::GetInstance() is first called. This +// instance is never deleted. +// +// UnitTest is not copyable. +// +// This class is thread-safe as long as the methods are called +// according to their specification. +class GTEST_API_ UnitTest { + public: + // Gets the singleton UnitTest object. The first time this method + // is called, a UnitTest object is constructed and returned. + // Consecutive calls will return the same object. + static UnitTest* GetInstance(); + + // Runs all tests in this UnitTest object and prints the result. + // Returns 0 if successful, or 1 otherwise. + // + // This method can only be called from the main thread. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + int Run() GTEST_MUST_USE_RESULT_; + + // Returns the working directory when the first TEST() or TEST_F() + // was executed. The UnitTest object owns the string. + const char* original_working_dir() const; + + // Returns the TestCase object for the test that's currently running, + // or NULL if no test is running. + const TestCase* current_test_case() const + GTEST_LOCK_EXCLUDED_(mutex_); + + // Returns the TestInfo object for the test that's currently running, + // or NULL if no test is running. + const TestInfo* current_test_info() const + GTEST_LOCK_EXCLUDED_(mutex_); + + // Returns the random seed used at the start of the current test run. + int random_seed() const; + +#if GTEST_HAS_PARAM_TEST + // Returns the ParameterizedTestCaseRegistry object used to keep track of + // value-parameterized tests and instantiate and register them. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + internal::ParameterizedTestCaseRegistry& parameterized_test_registry() + GTEST_LOCK_EXCLUDED_(mutex_); +#endif // GTEST_HAS_PARAM_TEST + + // Gets the number of successful test cases. + int successful_test_case_count() const; + + // Gets the number of failed test cases. + int failed_test_case_count() const; + + // Gets the number of all test cases. + int total_test_case_count() const; + + // Gets the number of all test cases that contain at least one test + // that should run. + int test_case_to_run_count() const; + + // Gets the number of successful tests. + int successful_test_count() const; + + // Gets the number of failed tests. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Gets the number of all tests. + int total_test_count() const; + + // Gets the number of tests that should run. + int test_to_run_count() const; + + // Gets the time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp() const; + + // Gets the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const; + + // Returns true iff the unit test passed (i.e. all test cases passed). + bool Passed() const; + + // Returns true iff the unit test failed (i.e. some test case failed + // or something outside of all tests failed). + bool Failed() const; + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + const TestCase* GetTestCase(int i) const; + + // Returns the TestResult containing information on test failures and + // properties logged outside of individual test cases. + const TestResult& ad_hoc_test_result() const; + + // Returns the list of event listeners that can be used to track events + // inside Google Test. + TestEventListeners& listeners(); + + private: + // Registers and returns a global test environment. When a test + // program is run, all global test environments will be set-up in + // the order they were registered. After all tests in the program + // have finished, all global test environments will be torn-down in + // the *reverse* order they were registered. + // + // The UnitTest object takes ownership of the given environment. + // + // This method can only be called from the main thread. + Environment* AddEnvironment(Environment* env); + + // Adds a TestPartResult to the current TestResult object. All + // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) + // eventually call this to report their results. The user code + // should use the assertion macros instead of calling this directly. + void AddTestPartResult(TestPartResult::Type result_type, + const char* file_name, + int line_number, + const std::string& message, + const std::string& os_stack_trace) + GTEST_LOCK_EXCLUDED_(mutex_); + + // Adds a TestProperty to the current TestResult object when invoked from + // inside a test, to current TestCase's ad_hoc_test_result_ when invoked + // from SetUpTestCase or TearDownTestCase, or to the global property set + // when invoked elsewhere. If the result already contains a property with + // the same key, the value will be updated. + void RecordProperty(const std::string& key, const std::string& value); + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + TestCase* GetMutableTestCase(int i); + + // Accessors for the implementation object. + internal::UnitTestImpl* impl() { return impl_; } + const internal::UnitTestImpl* impl() const { return impl_; } + + // These classes and funcions are friends as they need to access private + // members of UnitTest. + friend class Test; + friend class internal::AssertHelper; + friend class internal::ScopedTrace; + friend class internal::StreamingListenerTest; + friend class internal::UnitTestRecordPropertyTestHelper; + friend Environment* AddGlobalTestEnvironment(Environment* env); + friend internal::UnitTestImpl* internal::GetUnitTestImpl(); + friend void internal::ReportFailureInUnknownLocation( + TestPartResult::Type result_type, + const std::string& message); + + // Creates an empty UnitTest. + UnitTest(); + + // D'tor + virtual ~UnitTest(); + + // Pushes a trace defined by SCOPED_TRACE() on to the per-thread + // Google Test trace stack. + void PushGTestTrace(const internal::TraceInfo& trace) + GTEST_LOCK_EXCLUDED_(mutex_); + + // Pops a trace from the per-thread Google Test trace stack. + void PopGTestTrace() + GTEST_LOCK_EXCLUDED_(mutex_); + + // Protects mutable state in *impl_. This is mutable as some const + // methods need to lock it too. + mutable internal::Mutex mutex_; + + // Opaque implementation object. This field is never changed once + // the object is constructed. We don't mark it as const here, as + // doing so will cause a warning in the constructor of UnitTest. + // Mutable state in *impl_ is protected by mutex_. + internal::UnitTestImpl* impl_; + + // We disallow copying UnitTest. + GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest); +}; + +// A convenient wrapper for adding an environment for the test +// program. +// +// You should call this before RUN_ALL_TESTS() is called, probably in +// main(). If you use gtest_main, you need to call this before main() +// starts for it to take effect. For example, you can define a global +// variable like this: +// +// testing::Environment* const foo_env = +// testing::AddGlobalTestEnvironment(new FooEnvironment); +// +// However, we strongly recommend you to write your own main() and +// call AddGlobalTestEnvironment() there, as relying on initialization +// of global variables makes the code harder to read and may cause +// problems when you register multiple environments from different +// translation units and the environments have dependencies among them +// (remember that the compiler doesn't guarantee the order in which +// global variables from different translation units are initialized). +inline Environment* AddGlobalTestEnvironment(Environment* env) { + return UnitTest::GetInstance()->AddEnvironment(env); +} + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +// +// Calling the function for the second time has no user-visible effect. +GTEST_API_ void InitGoogleTest(int* argc, char** argv); + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv); + +namespace internal { + +// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a +// value of type ToPrint that is an operand of a comparison assertion +// (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in +// the comparison, and is used to help determine the best way to +// format the value. In particular, when the value is a C string +// (char pointer) and the other operand is an STL string object, we +// want to format the C string as a string, since we know it is +// compared by value with the string object. If the value is a char +// pointer but the other operand is not an STL string object, we don't +// know whether the pointer is supposed to point to a NUL-terminated +// string, and thus want to print it as a pointer to be safe. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + +// The default case. +template <typename ToPrint, typename OtherOperand> +class FormatForComparison { + public: + static ::std::string Format(const ToPrint& value) { + return ::testing::PrintToString(value); + } +}; + +// Array. +template <typename ToPrint, size_t N, typename OtherOperand> +class FormatForComparison<ToPrint[N], OtherOperand> { + public: + static ::std::string Format(const ToPrint* value) { + return FormatForComparison<const ToPrint*, OtherOperand>::Format(value); + } +}; + +// By default, print C string as pointers to be safe, as we don't know +// whether they actually point to a NUL-terminated string. + +#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \ + template <typename OtherOperand> \ + class FormatForComparison<CharType*, OtherOperand> { \ + public: \ + static ::std::string Format(CharType* value) { \ + return ::testing::PrintToString(static_cast<const void*>(value)); \ + } \ + } + +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t); + +#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_ + +// If a C string is compared with an STL string object, we know it's meant +// to point to a NUL-terminated string, and thus can print it as a string. + +#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \ + template <> \ + class FormatForComparison<CharType*, OtherStringType> { \ + public: \ + static ::std::string Format(CharType* value) { \ + return ::testing::PrintToString(value); \ + } \ + } + +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string); + +#if GTEST_HAS_GLOBAL_STRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string); +#endif + +#if GTEST_HAS_GLOBAL_WSTRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring); +#endif + +#if GTEST_HAS_STD_WSTRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring); +#endif + +#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_ + +// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc) +// operand to be used in a failure message. The type (but not value) +// of the other operand may affect the format. This allows us to +// print a char* as a raw pointer when it is compared against another +// char* or void*, and print it as a C string when it is compared +// against an std::string object, for example. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template <typename T1, typename T2> +std::string FormatForComparisonFailureMessage( + const T1& value, const T2& /* other_operand */) { + return FormatForComparison<T1, T2>::Format(value); +} + +// The helper function for {ASSERT|EXPECT}_EQ. +template <typename T1, typename T2> +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4389) // Temporarily disables warning on + // signed/unsigned mismatch. +#endif + + if (expected == actual) { + return AssertionSuccess(); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// With this overloaded version, we allow anonymous enums to be used +// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums +// can be implicitly cast to BiggestInt. +GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual); + +// The helper class for {ASSERT|EXPECT}_EQ. The template argument +// lhs_is_null_literal is true iff the first argument to ASSERT_EQ() +// is a null pointer literal. The following default implementation is +// for lhs_is_null_literal being false. +template <bool lhs_is_null_literal> +class EqHelper { + public: + // This templatized version is for the general case. + template <typename T1, typename T2> + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // With this overloaded version, we allow anonymous enums to be used + // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous + // enums can be implicitly cast to BiggestInt. + // + // Even though its body looks the same as the above version, we + // cannot merge the two, as it will make anonymous enums unhappy. + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } +}; + +// This specialization is used when the first argument to ASSERT_EQ() +// is a null pointer literal, like NULL, false, or 0. +template <> +class EqHelper<true> { + public: + // We define two overloaded versions of Compare(). The first + // version will be picked when the second argument to ASSERT_EQ() is + // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or + // EXPECT_EQ(false, a_bool). + template <typename T1, typename T2> + static AssertionResult Compare( + const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual, + // The following line prevents this overload from being considered if T2 + // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr) + // expands to Compare("", "", NULL, my_ptr), which requires a conversion + // to match the Secret* in the other overload, which would otherwise make + // this template match better. + typename EnableIf<!is_pointer<T2>::value>::type* = 0) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // This version will be picked when the second argument to ASSERT_EQ() is a + // pointer, e.g. ASSERT_EQ(NULL, a_pointer). + template <typename T> + static AssertionResult Compare( + const char* expected_expression, + const char* actual_expression, + // We used to have a second template parameter instead of Secret*. That + // template parameter would deduce to 'long', making this a better match + // than the first overload even without the first overload's EnableIf. + // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to + // non-pointer argument" (even a deduced integral argument), so the old + // implementation caused warnings in user code. + Secret* /* expected (NULL) */, + T* actual) { + // We already know that 'expected' is a null pointer. + return CmpHelperEQ(expected_expression, actual_expression, + static_cast<T*>(NULL), actual); + } +}; + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste +// of similar code. +// +// For each templatized helper function, we also define an overloaded +// version for BiggestInt in order to reduce code bloat and allow +// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled +// with gcc 4. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +template <typename T1, typename T2>\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + const T1& val1, const T2& val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +}\ +GTEST_API_ AssertionResult CmpHelper##op_name(\ + const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2) + +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + +// Implements the helper function for {ASSERT|EXPECT}_NE +GTEST_IMPL_CMP_HELPER_(NE, !=); +// Implements the helper function for {ASSERT|EXPECT}_LE +GTEST_IMPL_CMP_HELPER_(LE, <=); +// Implements the helper function for {ASSERT|EXPECT}_LT +GTEST_IMPL_CMP_HELPER_(LT, <); +// Implements the helper function for {ASSERT|EXPECT}_GE +GTEST_IMPL_CMP_HELPER_(GE, >=); +// Implements the helper function for {ASSERT|EXPECT}_GT +GTEST_IMPL_CMP_HELPER_(GT, >); + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRNE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + + +// Helper function for *_STREQ on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual); + +// Helper function for *_STRNE on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2); + +} // namespace internal + +// IsSubstring() and IsNotSubstring() are intended to be used as the +// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by +// themselves. They check whether needle is a substring of haystack +// (NULL is considered a substring of itself only), and return an +// appropriate error message when they fail. +// +// The {needle,haystack}_expr arguments are the stringified +// expressions that generated the two real arguments. +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); + +#if GTEST_HAS_STD_WSTRING +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +// Helper template function for comparing floating-points. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template <typename RawType> +AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression, + const char* actual_expression, + RawType expected, + RawType actual) { + const FloatingPoint<RawType> lhs(expected), rhs(actual); + + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + ::std::stringstream expected_ss; + expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << expected; + + ::std::stringstream actual_ss; + actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << actual; + + return EqFailure(expected_expression, + actual_expression, + StringStreamToString(&expected_ss), + StringStreamToString(&actual_ss), + false); +} + +// Helper function for implementing ASSERT_NEAR. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error); + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// A class that enables one to stream messages to assertion macros +class GTEST_API_ AssertHelper { + public: + // Constructor. + AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message); + ~AssertHelper(); + + // Message assignment is a semantic trick to enable assertion + // streaming; see the GTEST_MESSAGE_ macro below. + void operator=(const Message& message) const; + + private: + // We put our data in a struct so that the size of the AssertHelper class can + // be as small as possible. This is important because gcc is incapable of + // re-using stack space even for temporary variables, so every EXPECT_EQ + // reserves stack space for another AssertHelper. + struct AssertHelperData { + AssertHelperData(TestPartResult::Type t, + const char* srcfile, + int line_num, + const char* msg) + : type(t), file(srcfile), line(line_num), message(msg) { } + + TestPartResult::Type const type; + const char* const file; + int const line; + std::string const message; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData); + }; + + AssertHelperData* const data_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper); +}; + +} // namespace internal + +#if GTEST_HAS_PARAM_TEST +// The pure interface class that all value-parameterized tests inherit from. +// A value-parameterized class must inherit from both ::testing::Test and +// ::testing::WithParamInterface. In most cases that just means inheriting +// from ::testing::TestWithParam, but more complicated test hierarchies +// may need to inherit from Test and WithParamInterface at different levels. +// +// This interface has support for accessing the test parameter value via +// the GetParam() method. +// +// Use it with one of the parameter generator defining functions, like Range(), +// Values(), ValuesIn(), Bool(), and Combine(). +// +// class FooTest : public ::testing::TestWithParam<int> { +// protected: +// FooTest() { +// // Can use GetParam() here. +// } +// virtual ~FooTest() { +// // Can use GetParam() here. +// } +// virtual void SetUp() { +// // Can use GetParam() here. +// } +// virtual void TearDown { +// // Can use GetParam() here. +// } +// }; +// TEST_P(FooTest, DoesBar) { +// // Can use GetParam() method here. +// Foo foo; +// ASSERT_TRUE(foo.DoesBar(GetParam())); +// } +// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10)); + +template <typename T> +class WithParamInterface { + public: + typedef T ParamType; + virtual ~WithParamInterface() {} + + // The current parameter value. Is also available in the test fixture's + // constructor. This member function is non-static, even though it only + // references static data, to reduce the opportunity for incorrect uses + // like writing 'WithParamInterface<bool>::GetParam()' for a test that + // uses a fixture whose parameter type is int. + const ParamType& GetParam() const { + GTEST_CHECK_(parameter_ != NULL) + << "GetParam() can only be called inside a value-parameterized test " + << "-- did you intend to write TEST_P instead of TEST_F?"; + return *parameter_; + } + + private: + // Sets parameter value. The caller is responsible for making sure the value + // remains alive and unchanged throughout the current test. + static void SetParam(const ParamType* parameter) { + parameter_ = parameter; + } + + // Static value used for accessing parameter during a test lifetime. + static const ParamType* parameter_; + + // TestClass must be a subclass of WithParamInterface<T> and Test. + template <class TestClass> friend class internal::ParameterizedTestFactory; +}; + +template <typename T> +const T* WithParamInterface<T>::parameter_ = NULL; + +// Most value-parameterized classes can ignore the existence of +// WithParamInterface, and can just inherit from ::testing::TestWithParam. + +template <typename T> +class TestWithParam : public Test, public WithParamInterface<T> { +}; + +#endif // GTEST_HAS_PARAM_TEST + +// Macros for indicating success/failure in test code. + +// ADD_FAILURE unconditionally adds a failure to the current test. +// SUCCEED generates a success - it doesn't automatically make the +// current test successful, as a test is only successful when it has +// no failure. +// +// EXPECT_* verifies that a certain condition is satisfied. If not, +// it behaves like ADD_FAILURE. In particular: +// +// EXPECT_TRUE verifies that a Boolean condition is true. +// EXPECT_FALSE verifies that a Boolean condition is false. +// +// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except +// that they will also abort the current function on failure. People +// usually want the fail-fast behavior of FAIL and ASSERT_*, but those +// writing data-driven tests often find themselves using ADD_FAILURE +// and EXPECT_* more. + +// Generates a nonfatal failure with a generic message. +#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed") + +// Generates a nonfatal failure at the given source file location with +// a generic message. +#define ADD_FAILURE_AT(file, line) \ + GTEST_MESSAGE_AT_(file, line, "Failed", \ + ::testing::TestPartResult::kNonFatalFailure) + +// Generates a fatal failure with a generic message. +#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed") + +// Define this macro to 1 to omit the definition of FAIL(), which is a +// generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_FAIL +# define FAIL() GTEST_FAIL() +#endif + +// Generates a success with a generic message. +#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded") + +// Define this macro to 1 to omit the definition of SUCCEED(), which +// is a generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_SUCCEED +# define SUCCEED() GTEST_SUCCEED() +#endif + +// Macros for testing exceptions. +// +// * {ASSERT|EXPECT}_THROW(statement, expected_exception): +// Tests that the statement throws the expected exception. +// * {ASSERT|EXPECT}_NO_THROW(statement): +// Tests that the statement doesn't throw any exception. +// * {ASSERT|EXPECT}_ANY_THROW(statement): +// Tests that the statement throws an exception. + +#define EXPECT_THROW(statement, expected_exception) \ + GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_) +#define EXPECT_NO_THROW(statement) \ + GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_) +#define EXPECT_ANY_THROW(statement) \ + GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_) +#define ASSERT_THROW(statement, expected_exception) \ + GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_) +#define ASSERT_NO_THROW(statement) \ + GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_) +#define ASSERT_ANY_THROW(statement) \ + GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_) + +// Boolean assertions. Condition can be either a Boolean expression or an +// AssertionResult. For more information on how to use AssertionResult with +// these macros see comments on that class. +#define EXPECT_TRUE(condition) \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ + GTEST_NONFATAL_FAILURE_) +#define EXPECT_FALSE(condition) \ + GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ + GTEST_NONFATAL_FAILURE_) +#define ASSERT_TRUE(condition) \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ + GTEST_FATAL_FAILURE_) +#define ASSERT_FALSE(condition) \ + GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ + GTEST_FATAL_FAILURE_) + +// Includes the auto-generated header that implements a family of +// generic predicate assertion macros. +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command +// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! +// +// Implements a family of generic predicate assertion macros. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ + +// Makes sure this header is not included before gtest.h. +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +# error Do not include gtest_pred_impl.h directly. Include gtest.h instead. +#endif // GTEST_INCLUDE_GTEST_GTEST_H_ + +// This header implements a family of generic predicate assertion +// macros: +// +// ASSERT_PRED_FORMAT1(pred_format, v1) +// ASSERT_PRED_FORMAT2(pred_format, v1, v2) +// ... +// +// where pred_format is a function or functor that takes n (in the +// case of ASSERT_PRED_FORMATn) values and their source expression +// text, and returns a testing::AssertionResult. See the definition +// of ASSERT_EQ in gtest.h for an example. +// +// If you don't care about formatting, you can use the more +// restrictive version: +// +// ASSERT_PRED1(pred, v1) +// ASSERT_PRED2(pred, v1, v2) +// ... +// +// where pred is an n-ary function or functor that returns bool, +// and the values v1, v2, ..., must support the << operator for +// streaming to std::ostream. +// +// We also define the EXPECT_* variations. +// +// For now we only support predicates whose arity is at most 5. +// Please email googletestframework@googlegroups.com if you need +// support for higher arities. + +// GTEST_ASSERT_ is the basic statement to which all of the assertions +// in this file reduce. Don't use this in your code. + +#define GTEST_ASSERT_(expression, on_failure) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (const ::testing::AssertionResult gtest_ar = (expression)) \ + ; \ + else \ + on_failure(gtest_ar.failure_message()) + + +// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +template <typename Pred, + typename T1> +AssertionResult AssertPred1Helper(const char* pred_text, + const char* e1, + Pred pred, + const T1& v1) { + if (pred(v1)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1. +// Don't use this in your code. +#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, v1), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +#define GTEST_PRED1_(pred, v1, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \ + #v1, \ + pred, \ + v1), on_failure) + +// Unary predicate assertion macros. +#define EXPECT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED1(pred, v1) \ + GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED1(pred, v1) \ + GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2> +AssertionResult AssertPred2Helper(const char* pred_text, + const char* e1, + const char* e2, + Pred pred, + const T1& v1, + const T2& v2) { + if (pred(v1, v2)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2. +// Don't use this in your code. +#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +#define GTEST_PRED2_(pred, v1, v2, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \ + #v1, \ + #v2, \ + pred, \ + v1, \ + v2), on_failure) + +// Binary predicate assertion macros. +#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED2(pred, v1, v2) \ + GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED2(pred, v1, v2) \ + GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3> +AssertionResult AssertPred3Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3) { + if (pred(v1, v2, v3)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3. +// Don't use this in your code. +#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + pred, \ + v1, \ + v2, \ + v3), on_failure) + +// Ternary predicate assertion macros. +#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3, + typename T4> +AssertionResult AssertPred4Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) { + if (pred(v1, v2, v3, v4)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4. +// Don't use this in your code. +#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4), on_failure) + +// 4-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +AssertionResult AssertPred5Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + const char* e5, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) { + if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ", " + << e5 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4 + << "\n" << e5 << " evaluates to " << v5; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5. +// Don't use this in your code. +#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + #v5, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4, \ + v5), on_failure) + +// 5-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) + + + +#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ + +// Macros for testing equalities and inequalities. +// +// * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual +// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2 +// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2 +// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2 +// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2 +// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2 +// +// When they are not, Google Test prints both the tested expressions and +// their actual values. The values must be compatible built-in types, +// or you will get a compiler error. By "compatible" we mean that the +// values can be compared by the respective operator. +// +// Note: +// +// 1. It is possible to make a user-defined type work with +// {ASSERT|EXPECT}_??(), but that requires overloading the +// comparison operators and is thus discouraged by the Google C++ +// Usage Guide. Therefore, you are advised to use the +// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are +// equal. +// +// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on +// pointers (in particular, C strings). Therefore, if you use it +// with two C strings, you are testing how their locations in memory +// are related, not how their content is related. To compare two C +// strings by content, use {ASSERT|EXPECT}_STR*(). +// +// 3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to +// {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you +// what the actual value is when it fails, and similarly for the +// other comparisons. +// +// 4. Do not depend on the order in which {ASSERT|EXPECT}_??() +// evaluate their arguments, which is undefined. +// +// 5. These macros evaluate their arguments exactly once. +// +// Examples: +// +// EXPECT_NE(5, Foo()); +// EXPECT_EQ(NULL, a_pointer); +// ASSERT_LT(i, array_size); +// ASSERT_GT(records.size(), 0) << "There is no record left."; + +#define EXPECT_EQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal:: \ + EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \ + expected, actual) +#define EXPECT_NE(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual) +#define EXPECT_LE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define EXPECT_LT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define EXPECT_GE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define EXPECT_GT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +#define GTEST_ASSERT_EQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal:: \ + EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \ + expected, actual) +#define GTEST_ASSERT_NE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) +#define GTEST_ASSERT_LE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define GTEST_ASSERT_LT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define GTEST_ASSERT_GE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define GTEST_ASSERT_GT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of +// ASSERT_XY(), which clashes with some users' own code. + +#if !GTEST_DONT_DEFINE_ASSERT_EQ +# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_NE +# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_LE +# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_LT +# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_GE +# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_GT +# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2) +#endif + +// C-string Comparisons. All tests treat NULL and any non-NULL string +// as different. Two NULLs are equal. +// +// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2 +// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2 +// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case +// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case +// +// For wide or narrow string objects, you can use the +// {ASSERT|EXPECT}_??() macros. +// +// Don't depend on the order in which the arguments are evaluated, +// which is undefined. +// +// These macros evaluate their arguments exactly once. + +#define EXPECT_STREQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define EXPECT_STRNE(s1, s2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define EXPECT_STRCASEEQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define EXPECT_STRCASENE(s1, s2)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +#define ASSERT_STREQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define ASSERT_STRNE(s1, s2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define ASSERT_STRCASEEQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define ASSERT_STRCASENE(s1, s2)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +// Macros for comparing floating-point numbers. +// +// * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual): +// Tests that two float values are almost equal. +// * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual): +// Tests that two double values are almost equal. +// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error): +// Tests that v1 and v2 are within the given distance to each other. +// +// Google Test uses ULP-based comparison to automatically pick a default +// error bound that is appropriate for the operands. See the +// FloatingPoint template class in gtest-internal.h if you are +// interested in the implementation details. + +#define EXPECT_FLOAT_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \ + expected, actual) + +#define EXPECT_DOUBLE_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \ + expected, actual) + +#define ASSERT_FLOAT_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \ + expected, actual) + +#define ASSERT_DOUBLE_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \ + expected, actual) + +#define EXPECT_NEAR(val1, val2, abs_error)\ + EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +#define ASSERT_NEAR(val1, val2, abs_error)\ + ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +// These predicate format functions work on floating-point values, and +// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g. +// +// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0); + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2); +GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2); + + +#if GTEST_OS_WINDOWS + +// Macros that test for HRESULT failure and success, these are only useful +// on Windows, and rely on Windows SDK macros and APIs to compile. +// +// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr) +// +// When expr unexpectedly fails or succeeds, Google Test prints the +// expected result and the actual result with both a human-readable +// string representation of the error, if available, as well as the +// hex result code. +# define EXPECT_HRESULT_SUCCEEDED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +# define ASSERT_HRESULT_SUCCEEDED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +# define EXPECT_HRESULT_FAILED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +# define ASSERT_HRESULT_FAILED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +#endif // GTEST_OS_WINDOWS + +// Macros that execute statement and check that it doesn't generate new fatal +// failures in the current thread. +// +// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement); +// +// Examples: +// +// EXPECT_NO_FATAL_FAILURE(Process()); +// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed"; +// +#define ASSERT_NO_FATAL_FAILURE(statement) \ + GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_) +#define EXPECT_NO_FATAL_FAILURE(statement) \ + GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_) + +// Causes a trace (including the source file path, the current line +// number, and the given message) to be included in every test failure +// message generated by code in the current scope. The effect is +// undone when the control leaves the current scope. +// +// The message argument can be anything streamable to std::ostream. +// +// In the implementation, we include the current line number as part +// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s +// to appear in the same block - as long as they are on different +// lines. +#define SCOPED_TRACE(message) \ + ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ + __FILE__, __LINE__, ::testing::Message() << (message)) + +// Compile-time assertion for type equality. +// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are +// the same type. The value it returns is not interesting. +// +// Instead of making StaticAssertTypeEq a class template, we make it a +// function template that invokes a helper class template. This +// prevents a user from misusing StaticAssertTypeEq<T1, T2> by +// defining objects of that type. +// +// CAVEAT: +// +// When used inside a method of a class template, +// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is +// instantiated. For example, given: +// +// template <typename T> class Foo { +// public: +// void Bar() { testing::StaticAssertTypeEq<int, T>(); } +// }; +// +// the code: +// +// void Test1() { Foo<bool> foo; } +// +// will NOT generate a compiler error, as Foo<bool>::Bar() is never +// actually instantiated. Instead, you need: +// +// void Test2() { Foo<bool> foo; foo.Bar(); } +// +// to cause a compiler error. +template <typename T1, typename T2> +bool StaticAssertTypeEq() { + (void)internal::StaticAssertTypeEqHelper<T1, T2>(); + return true; +} + +// Defines a test. +// +// The first parameter is the name of the test case, and the second +// parameter is the name of the test within the test case. +// +// The convention is to end the test case name with "Test". For +// example, a test case for the Foo class can be named FooTest. +// +// The user should put his test code between braces after using this +// macro. Example: +// +// TEST(FooTest, InitializesCorrectly) { +// Foo foo; +// EXPECT_TRUE(foo.StatusIsOK()); +// } + +// Note that we call GetTestTypeId() instead of GetTypeId< +// ::testing::Test>() here to get the type ID of testing::Test. This +// is to work around a suspected linker bug when using Google Test as +// a framework on Mac OS X. The bug causes GetTypeId< +// ::testing::Test>() to return different values depending on whether +// the call is from the Google Test framework itself or from user test +// code. GetTestTypeId() is guaranteed to always return the same +// value, as it always calls GetTypeId<>() from the Google Test +// framework. +#define GTEST_TEST(test_case_name, test_name)\ + GTEST_TEST_(test_case_name, test_name, \ + ::testing::Test, ::testing::internal::GetTestTypeId()) + +// Define this macro to 1 to omit the definition of TEST(), which +// is a generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_TEST +# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name) +#endif + +// Defines a test that uses a test fixture. +// +// The first parameter is the name of the test fixture class, which +// also doubles as the test case name. The second parameter is the +// name of the test within the test case. +// +// A test fixture class must be declared earlier. The user should put +// his test code between braces after using this macro. Example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { b_.AddElement(3); } +// +// Foo a_; +// Foo b_; +// }; +// +// TEST_F(FooTest, InitializesCorrectly) { +// EXPECT_TRUE(a_.StatusIsOK()); +// } +// +// TEST_F(FooTest, ReturnsElementCountCorrectly) { +// EXPECT_EQ(0, a_.size()); +// EXPECT_EQ(1, b_.size()); +// } + +#define TEST_F(test_fixture, test_name)\ + GTEST_TEST_(test_fixture, test_name, test_fixture, \ + ::testing::internal::GetTypeId<test_fixture>()) + +} // namespace testing + +// Use this function in main() to run all tests. It returns 0 if all +// tests are successful, or 1 otherwise. +// +// RUN_ALL_TESTS() should be invoked after the command line has been +// parsed by InitGoogleTest(). +// +// This function was formerly a macro; thus, it is in the global +// namespace and has an all-caps name. +int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_; + +inline int RUN_ALL_TESTS() { + return ::testing::UnitTest::GetInstance()->Run(); +} + +#endif // GTEST_INCLUDE_GTEST_GTEST_H_