diff --git a/doc/src/accelerate_kokkos.txt b/doc/src/accelerate_kokkos.txt index 1a45c04a1b5b1ae469d3f0ed620b90366fb42c9a..3d31344c247a73f6dc2405545e1014e63b628d16 100644 --- a/doc/src/accelerate_kokkos.txt +++ b/doc/src/accelerate_kokkos.txt @@ -110,14 +110,14 @@ mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj # ditto on 8 Phis :p [Required hardware/software:] Kokkos support within LAMMPS must be built with a C++11 compatible -compiler. If using gcc, version 4.8.1 or later is required. +compiler. If using gcc, version 4.7.2 or later is required. To build with Kokkos support for CPUs, your compiler must support the OpenMP interface. You should have one or more multi-core CPUs so that multiple threads can be launched by each MPI task running on a CPU. To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software -version 6.5 or later must be installed on your system. See the +version 7.5 or later must be installed on your system. See the discussion for the "GPU"_accelerate_gpu.html package for details of how to check and do this. diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore deleted file mode 100644 index f9d16be1558495fb95e3f5c4b785eefd3b3aa854..0000000000000000000000000000000000000000 --- a/lib/kokkos/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Standard ignores -*~ -*.pyc -\#*# -.#* -.*.swp -.cproject -.project diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..a444f08eed9a9b3f7dc376435f73d5334a00d3ee --- /dev/null +++ b/lib/kokkos/CHANGELOG.md @@ -0,0 +1,284 @@ +# Change Log + +## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07) + +**Implemented enhancements:** + +- Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589) +- Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588) +- Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584) +- Building Tutorial Examples [\#582](https://github.com/kokkos/kokkos/issues/582) +- Internal way for using ThreadVectorRange without TeamHandle [\#574](https://github.com/kokkos/kokkos/issues/574) +- Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571) +- Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557) +- nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543) +- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541) +- Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536) +- Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535) +- Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527) +- Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522) +- Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517) +- Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501) +- Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393) +- Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79) + +**Fixed bugs:** + +- Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586) +- Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570) +- Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568) +- XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553) +- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541) +- Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539) +- Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534) + +## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01) + +**Implemented enhancements:** + +- Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506) + +**Fixed bugs:** + +- Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520) +- Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516) + +## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00) + +**Implemented enhancements:** + +- Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511) +- Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508) +- Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498) +- Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490) +- Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485) +- Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484) +- LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473) +- Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468) +- Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465) +- Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455) +- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445) +- Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432) +- Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421) +- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396) +- Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325) +- TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305) +- CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300) +- Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259) + +**Fixed bugs:** + +- Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515) +- Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509) +- multi\_scratch test with hwloc and pthreads seg-faults. [\#504](https://github.com/kokkos/kokkos/issues/504) +- generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503) +- make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502) +- Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497) +- Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491) +- UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489) +- Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479) +- make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471) +- OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424) +- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396) +- New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372) +- MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298) + +## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10) + +**Implemented enhancements:** + +- Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438) +- parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436) +- data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351) +- Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323) +- Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195) + +**Fixed bugs:** + +- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445) +- Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435) +- Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422) +- Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404) +- data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351) +- const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310) +- Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307) +- Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159) + + +## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06) + +**Implemented enhancements:** + +- Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411) +- TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390) +- Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387) +- Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378) +- Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373) +- Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341) +- Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336) +- TaskPolicy\<Cuda\> performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218) + +**Fixed bugs:** + +- Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407) +- Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402) +- Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398) +- Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389) +- Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385) +- Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365) +- wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352) +- Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344) +- Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324) +- subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309) +- Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196) + +## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21) +[Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00) + +**Implemented enhancements:** + +- Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327) +- DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321) +- Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295) +- subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280) +- Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245) +- \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177) +- View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176) +- Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99) +- DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293) +- Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292) +- Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286) +- deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267) +- Suggestion: Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261) +- More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260) +- Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256) +- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237) +- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237) +- Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234) +- Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230) +- View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227) +- Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216) +- Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212) +- Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194) +- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192) +- Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189) +- Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158) +- Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153) +- Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152) +- Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149) +- Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143) +- Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142) +- New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138) +- New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135) +- Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134) +- NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121) +- Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117) +- Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108) +- Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106) +- Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105) +- Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104) +- Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103) +- Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100) +- SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81) +- Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80) +- Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72) +- Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53) +- UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50) +- Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44) +- index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28) +- Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1) + +**Fixed bugs:** + +- misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340) +- seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338) +- Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335) +- Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311) +- DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303) +- Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279) +- MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312) +- Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299) +- MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297) +- Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296) +- View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282) +- Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281) +- deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262) +- DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248) +- Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238) +- boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225) +- Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210) +- unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202) +- nvcc\_wrapper Does Not Support -Xcompiler \<compiler option\> [\#198](https://github.com/kokkos/kokkos/issues/198) +- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192) +- Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186) +- pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182) +- parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175) +- Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173) +- KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166) +- 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161) +- fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157) +- Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139) +- Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131) +- parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125) +- Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119) +- Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92) +- Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89) +- Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88) +- Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87) +- Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85) +- Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76) +- Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69) +- Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63) +- Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60) +- Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20) +- atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6) +- OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4) + +**Closed issues:** + +- develop branch broken with CUDA 8 and --expt-extended-lambda [\#354](https://github.com/kokkos/kokkos/issues/354) +- --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349) +- Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343) +- Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318) +- Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317) +- Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277) +- deleted [\#271](https://github.com/kokkos/kokkos/issues/271) +- Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268) +- 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246) +- parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232) +- build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209) +- Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207) +- Unit test failure on Hansen KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200) +- nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180) +- Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171) +- missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165) +- Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144) +- New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137) +- New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136) +- Signed/unsigned comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130) +- Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126) +- Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110) +- Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91) +- Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90) +- Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84) +- nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42) +- Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39) +- Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31) +- Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25) +- Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24) +- Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23) +- Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21) +- Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17) +- Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16) +- MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9) +- subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5) + +## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15) + + +\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 1219352f73dc47360555639b1f4c3ddde410e9a5..2b2b9be6aa3b9bc7ae04a3c462a9b5ef7c986543 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -34,8 +34,8 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) # for compatibility with Kokkos' Makefile build system. TRIBITS_ADD_OPTION_AND_DEFINE( - ${PACKAGE_NAME}_ENABLE_DEBUG - ${PACKAGE_NAME_UC}_HAVE_DEBUG + Kokkos_ENABLE_DEBUG + KOKKOS_HAVE_DEBUG "Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build." ${${PROJECT_NAME}_ENABLE_DEBUG} ) @@ -57,7 +57,21 @@ TRIBITS_ADD_OPTION_AND_DEFINE( TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Cuda_UVM KOKKOS_USE_CUDA_UVM - "Enable CUDA Unified Virtual Memory support in Kokkos." + "Enable CUDA Unified Virtual Memory as the default in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda_RDC + KOKKOS_HAVE_CUDA_RDC + "Enable CUDA Relocatable Device Code support in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda_Lambda + KOKKOS_HAVE_CUDA_LAMBDA + "Enable CUDA LAMBDA support in Kokkos." OFF ) @@ -72,6 +86,9 @@ ASSERT_DEFINED(TPL_ENABLE_Pthread) IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") ENDIF () +IF (NOT TPL_ENABLE_Pthread) + ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0) +ENDIF() TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_OpenMP @@ -162,13 +179,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE( #------------------------------------------------------------------------------ # -# C) Process the subpackages for Kokkos +# C) Install Kokkos' executable scripts +# + + +# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. +# Kokkos needs nvcc_wrapper in order to build. Other libraries and +# executables also need nvcc_wrapper. Thus, we need to install it. +# If the argument of DESTINATION is a relative path, CMake computes it +# as relative to ${CMAKE_INSTALL_PATH}. + +INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) + + +#------------------------------------------------------------------------------ +# +# D) Process the subpackages for Kokkos # TRIBITS_PROCESS_SUBPACKAGES() # -# D) If Kokkos itself is enabled, process the Kokkos package +# E) If Kokkos itself is enabled, process the Kokkos package # TRIBITS_PACKAGE_DEF() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 73a332ee1110a2bff148f08ca3d9cc758ecabff3..038c252cf034654abcc5a6b100f6f99bd46663ee 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -7,25 +7,26 @@ CXXFLAGS=$(CCFLAGS) #Options: OpenMP,Serial,Pthreads,Cuda KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Pthreads" -#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW +#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX KOKKOS_ARCH ?= "" #Options: yes,no KOKKOS_DEBUG ?= "no" #Options: hwloc,librt,experimental_memkind KOKKOS_USE_TPLS ?= "" -#Options: c++11 +#Options: c++11,c++1z KOKKOS_CXX_STANDARD ?= "c++11" #Options: aggressive_vectorization,disable_profiling KOKKOS_OPTIONS ?= "" #Default settings specific options #Options: force_uvm,use_ldg,rdc,enable_lambda -KOKKOS_CUDA_OPTIONS ?= "" +KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Check for general settings KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) +KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l)) # Check for external libraries KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) @@ -53,23 +54,71 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) endif endif +# Check for other Execution Spaces + +KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) + CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) +endif + +# Check OS + +KOKKOS_OS := $(shell uname -s) +KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l) +KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l) +KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l) + +# Check compiler + KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) -KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l) +KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) +ifneq ($(OMPI_CXX),) + KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l) +endif +ifneq ($(MPICH_CXX),) + KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l) +endif +KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) + +ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) + KOKKOS_INTERNAL_COMPILER_CLANG = 1 +endif +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2) + KOKKOS_INTERNAL_COMPILER_XL = 1 +endif + +ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) + $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) + endif + KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 + endif +endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -mp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment - KOKKOS_INTERNAL_OPENMP_FLAG := + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif endif endif endif @@ -84,13 +133,11 @@ else KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 else KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 + KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z endif endif endif -# Check for other Execution Spaces -KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) - # Check for Kokkos Architecture settings #Intel based @@ -98,6 +145,7 @@ KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) #NVIDIA based @@ -110,11 +158,13 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) @@ -127,13 +177,16 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) endif #ARM based -KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) #IBM based KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) @@ -145,17 +198,18 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) #Any AVX? -KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) # Decide what ISA level we are able to support -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc )) #Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -207,15 +261,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) @@ -230,9 +290,15 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) +endif + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_CXXFLAGS += -G +ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_CXXFLAGS += -lineinfo endif KOKKOS_CXXFLAGS += -g KOKKOS_LDFLAGS += -g -ldl @@ -273,13 +339,14 @@ endif tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) @@ -289,27 +356,101 @@ ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda + else + $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) + endif + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + endif +endif endif #Add Architecture flags -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -march=armv8-a + KOKKOS_LDFLAGS += -march=armv8-a + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -march=armv8.1-a + KOKKOS_LDFLAGS += -march=armv8.1-a + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx + KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx + endif endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=sandybridge + KOKKOS_LDFLAGS += -tp=sandybridge + else + # Assume that this is a really a GNU compiler + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler or it could be XL on P8 + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) @@ -322,7 +463,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) else ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - + KOKKOS_CXXFLAGS += -tp=haswell + KOKKOS_LDFLAGS += -tp=haswell else # Assume that this is a really a GNU compiler KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 @@ -352,52 +494,85 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX512 + KOKKOS_LDFLAGS += -xCORE-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Nothing here yet + KOKKOS_CXXFLAGS += -march=skylake-avx512 + KOKKOS_LDFLAGS += -march=skylake-avx512 + endif + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) KOKKOS_CXXFLAGS += -mmic KOKKOS_LDFLAGS += -mmic endif +#Figure out the architecture flag for Cuda ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch +endif +ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_30 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_32 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_35 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_37 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_50 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_52 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_53 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -arch=sm_61 + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 endif endif @@ -424,6 +599,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_LIBS += -lcudart -lcuda endif @@ -443,7 +619,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) else KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) @@ -451,6 +627,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif +#Explicitly set the GCC Toolchain for Clang +ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) + KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) + KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC + KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) +endif + #With Cygwin functions such as fdopen and fileno are not defined #when strict ansi is enabled. strict ansi gets enabled with --std=c++11 #though. So we hard undefine it here. Not sure if that has any bad side effects @@ -471,7 +655,7 @@ KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) include $(KOKKOS_PATH)/Makefile.targets kokkos-clean: - -rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a + rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ar cr libkokkos.a $(KOKKOS_OBJ_LINK) diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 86929ea0fe6e9e2158923e6907c7b2a179e5af61..a48a5f6eb7ea78712b3f6caf695745b4ef18c043 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -14,20 +14,16 @@ Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc. $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp -Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp -Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp -Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp +Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp -KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp +Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp @@ -38,8 +34,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp -Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) @@ -47,8 +41,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp -Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) @@ -67,6 +59,4 @@ endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp -Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp diff --git a/lib/kokkos/README b/lib/kokkos/README index b094578af631b179e9744f744a823a1800bd885b..ffc1fe53b5c6a3d555e61626768df727b51a34a2 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -45,31 +45,32 @@ Primary tested compilers on X86 are: Intel 14.0.4 Intel 15.0.2 Intel 16.0.1 + Intel 17.0.098 Clang 3.5.2 Clang 3.6.1 + Clang 3.9.0 Primary tested compilers on Power 8 are: - IBM XL 13.1.3 (OpenMP,Serial) - GCC 4.9.2 (OpenMP,Serial) - GCC 5.3.0 (OpenMP,Serial) + GCC 5.4.0 (OpenMP,Serial) + IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) + +Primary tested compilers on Intel KNL are: + Intel 16.2.181 (with gcc 4.7.2) + Intel 17.0.098 (with gcc 4.7.2) Secondary tested compilers are: - CUDA 6.5 (with gcc 4.7.2) CUDA 7.0 (with gcc 4.7.2) - CUDA 7.5 (with gcc 4.8.4) + CUDA 7.5 (with gcc 4.7.2) + CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8) + CUDA/Clang 8.0 using Clang/Trunk compiler Other compilers working: X86: - Intel 17.0.042 (the FENL example causes internal compiler error) PGI 15.4 Cygwin 2.1.0 64bit with gcc 4.9.3 - KNL: - Intel 16.2.181 (the FENL example causes internal compiler error) - Intel 17.0.042 (the FENL example causes internal compiler error) Known non-working combinations: Power8: - GCC 6.1.0 Pthreads backend @@ -92,9 +93,10 @@ master branch, without -Werror and only for a select set of backends. In the 'example/tutorial' directory you will find step by step tutorial examples which explain many of the features of Kokkos. They work with -simple Makefiles. To build with g++ and OpenMP simply type 'make openmp' +simple Makefiles. To build with g++ and OpenMP simply type 'make' in the 'example/tutorial' directory. This will build all examples in the -subfolders. +subfolders. To change the build options refer to the Programming Guide +in the compilation section. ============================================================================ ====Running Unit Tests====================================================== diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index d7c06dc14be99bc63b8f0170843d81067577771e..78cddeeaecb2e1f8b748b70dcb848e2778fdc7e1 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -476,54 +476,54 @@ namespace Kokkos { }; template<class Generator> - struct rand<Generator, ::Kokkos::complex<float> > { + struct rand<Generator, Kokkos::complex<float> > { KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<float> max () { - return ::Kokkos::complex<float> (1.0, 1.0); + static Kokkos::complex<float> max () { + return Kokkos::complex<float> (1.0, 1.0); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<float> draw (Generator& gen) { + static Kokkos::complex<float> draw (Generator& gen) { const float re = gen.frand (); const float im = gen.frand (); - return ::Kokkos::complex<float> (re, im); + return Kokkos::complex<float> (re, im); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) { + static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) { const float re = gen.frand (real (range)); const float im = gen.frand (imag (range)); - return ::Kokkos::complex<float> (re, im); + return Kokkos::complex<float> (re, im); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) { + static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) { const float re = gen.frand (real (start), real (end)); const float im = gen.frand (imag (start), imag (end)); - return ::Kokkos::complex<float> (re, im); + return Kokkos::complex<float> (re, im); } }; template<class Generator> - struct rand<Generator, ::Kokkos::complex<double> > { + struct rand<Generator, Kokkos::complex<double> > { KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<double> max () { - return ::Kokkos::complex<double> (1.0, 1.0); + static Kokkos::complex<double> max () { + return Kokkos::complex<double> (1.0, 1.0); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<double> draw (Generator& gen) { + static Kokkos::complex<double> draw (Generator& gen) { const double re = gen.drand (); const double im = gen.drand (); - return ::Kokkos::complex<double> (re, im); + return Kokkos::complex<double> (re, im); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) { + static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) { const double re = gen.drand (real (range)); const double im = gen.drand (imag (range)); - return ::Kokkos::complex<double> (re, im); + return Kokkos::complex<double> (re, im); } KOKKOS_INLINE_FUNCTION - static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) { + static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) { const double re = gen.drand (real (start), real (end)); const double im = gen.drand (imag (start), imag (end)); - return ::Kokkos::complex<double> (re, im); + return Kokkos::complex<double> (re, im); } }; @@ -670,8 +670,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); @@ -910,8 +910,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); @@ -1163,8 +1163,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 6123ce978c8a385a87ac57bdca45a9ff8517757f..5b8c65fee1869c25681567036314d25beab9a5f2 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -51,7 +51,7 @@ namespace Kokkos { - namespace SortImpl { + namespace Impl { template<class ValuesViewType, int Rank=ValuesViewType::Rank> struct CopyOp; @@ -199,7 +199,7 @@ public: parallel_for(values.dimension_0(), bin_sort_sort_functor<ValuesViewType, offset_type, - SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order)); + Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order)); deep_copy(values,sorted_values); } @@ -262,17 +262,15 @@ public: } }; -namespace SortImpl { - template<class KeyViewType> -struct DefaultBinOp1D { +struct BinOp1D { const int max_bins_; const double mul_; typename KeyViewType::const_value_type range_; typename KeyViewType::const_value_type min_; //Construct BinOp with number of bins, minimum value and maxuimum value - DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max ) :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} @@ -298,13 +296,13 @@ struct DefaultBinOp1D { }; template<class KeyViewType> -struct DefaultBinOp3D { +struct BinOp3D { int max_bins_[3]; double mul_[3]; typename KeyViewType::non_const_value_type range_[3]; typename KeyViewType::non_const_value_type min_[3]; - DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[] ) { max_bins_[0] = max_bins__[0]+1; @@ -348,109 +346,11 @@ struct DefaultBinOp3D { } }; -template<typename Scalar> -struct min_max { - Scalar min; - Scalar max; - bool init; - - KOKKOS_INLINE_FUNCTION - min_max() { - min = 0; - max = 0; - init = 0; - } - - KOKKOS_INLINE_FUNCTION - min_max (const min_max& val) { - min = val.min; - max = val.max; - init = val.init; - } - - KOKKOS_INLINE_FUNCTION - min_max operator = (const min_max& val) { - min = val.min; - max = val.max; - init = val.init; - return *this; - } - - KOKKOS_INLINE_FUNCTION - void operator+= (const Scalar& val) { - if(init) { - min = min<val?min:val; - max = max>val?max:val; - } else { - min = val; - max = val; - init = 1; - } - } - - KOKKOS_INLINE_FUNCTION - void operator+= (const min_max& val) { - if(init && val.init) { - min = min<val.min?min:val.min; - max = max>val.max?max:val.max; - } else { - if(val.init) { - min = val.min; - max = val.max; - init = 1; - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator+= (volatile const Scalar& val) volatile { - if(init) { - min = min<val?min:val; - max = max>val?max:val; - } else { - min = val; - max = val; - init = 1; - } - } - - KOKKOS_INLINE_FUNCTION - void operator+= (volatile const min_max& val) volatile { - if(init && val.init) { - min = min<val.min?min:val.min; - max = max>val.max?max:val.max; - } else { - if(val.init) { - min = val.min; - max = val.max; - init = 1; - } - } - } -}; - - -template<class ViewType> -struct min_max_functor { - typedef typename ViewType::execution_space execution_space; - ViewType view; - typedef min_max<typename ViewType::non_const_value_type> value_type; - min_max_functor (const ViewType view_):view(view_) { - } - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i, value_type& val) const { - val += view(i); - } -}; +namespace Impl { template<class ViewType> bool try_std_sort(ViewType view) { bool possible = true; -#if ! KOKKOS_USING_EXP_VIEW - size_t stride[8]; - view.stride(stride); -#else size_t stride[8] = { view.stride_0() , view.stride_1() , view.stride_2() @@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) { , view.stride_6() , view.stride_7() }; -#endif - possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value; + possible = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value; possible = possible && (ViewType::Rank == 1); possible = possible && (stride[0] == 1); if(possible) { @@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) { return possible; } +template<class ViewType> +struct min_max_functor { + typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar; + + ViewType view; + min_max_functor(const ViewType& view_):view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const size_t& i, minmax_scalar& minmax) const { + if(view(i) < minmax.min_val) minmax.min_val = view(i); + if(view(i) > minmax.max_val) minmax.max_val = view(i); + } +}; + } template<class ViewType> void sort(ViewType view, bool always_use_kokkos_sort = false) { if(!always_use_kokkos_sort) { - if(SortImpl::try_std_sort(view)) return; + if(Impl::try_std_sort(view)) return; } - - typedef SortImpl::DefaultBinOp1D<ViewType> CompType; - SortImpl::min_max<typename ViewType::non_const_value_type> val; - parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val); - BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true); + typedef BinOp1D<ViewType> CompType; + + Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result; + Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result); + parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()), + Impl::min_max_functor<ViewType>(view),reducer); + if(result.min_val == result.max_val) return; + BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true); bin_sort.create_permute_vector(); bin_sort.sort(view); } -/*template<class ViewType, class Comparator> -void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) { - -}*/ - } #endif diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt index 654104b44e7b395c6937f4c1dc35b4933018268e..fde6b967e06931ad5fd04f38ac2eba20ca654a9e 100644 --- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -1,6 +1,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) SET(SOURCES diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index 5d79364c52abc7a8a61769d187fc06e5612e203b..3027c6a94b9826ba9da11648539dc5c83bebaa77 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= -lpthread + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests TEST_TARGETS = diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index ccbcbdd0011bbc577ac8c39b2f593ed35f2546ac..03e4fb691ef1a4ae6a7bed6471ccba4e3fd53762 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) { typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType; KeyViewType keys("Keys",n); + // Test sorting array with all numbers equal + Kokkos::deep_copy(keys,KeyType(1)); + Kokkos::sort(keys,force_kokkos); + Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND); @@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) { typename KeyViewType::value_type min[3] = {0,0,0}; typename KeyViewType::value_type max[3] = {100,100,100}; - typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp; + typedef Kokkos::BinOp3D< KeyViewType > BinOp; BinOp bin_op(bin_max,min,max); Kokkos::BinSort< KeyViewType , BinOp > Sorter(keys,bin_op,false); diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6a1917a523170bb392c6e81855e60489085bf113 --- /dev/null +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -0,0 +1,43 @@ +KOKKOS_PATH = ${HOME}/kokkos +SRC = $(wildcard *.cpp) +KOKKOS_DEVICES=Cuda +KOKKOS_CUDA_OPTIONS=enable_lambda + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +EXE = bytes_and_flops.cuda +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +EXE = bytes_and_flops.host +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +CXXFLAGS = -O3 -g + +DEPFLAGS = -M +LINK = ${CXX} +LINKFLAGS = + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e3fe42a652dfaa963578052664a8df71e03afce1 --- /dev/null +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<Kokkos_Core.hpp> +#include<impl/Kokkos_Timer.hpp> + +template<class Scalar, int Unroll,int Stride> +struct Run { +static void run(int N, int K, int R, int F, int T, int S); +}; + +template<class Scalar, int Stride> +struct RunStride { +static void run_1(int N, int K, int R, int F, int T, int S); +static void run_2(int N, int K, int R, int F, int T, int S); +static void run_3(int N, int K, int R, int F, int T, int S); +static void run_4(int N, int K, int R, int F, int T, int S); +static void run_5(int N, int K, int R, int F, int T, int S); +static void run_6(int N, int K, int R, int F, int T, int S); +static void run_7(int N, int K, int R, int F, int T, int S); +static void run_8(int N, int K, int R, int F, int T, int S); +static void run(int N, int K, int R, int U, int F, int T, int S); +}; + +#define STRIDE 1 +#include<bench_stride.hpp> +#undef STRIDE +#define STRIDE 2 +#include<bench_stride.hpp> +#undef STRIDE +#define STRIDE 4 +#include<bench_stride.hpp> +#undef STRIDE +#define STRIDE 8 +#include<bench_stride.hpp> +#undef STRIDE +#define STRIDE 16 +#include<bench_stride.hpp> +#undef STRIDE +#define STRIDE 32 +#include<bench_stride.hpp> +#undef STRIDE + +template<class Scalar> +void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) { + if(D == 1) + RunStride<Scalar,1>::run(N,K,R,U,F,T,S); + if(D == 2) + RunStride<Scalar,2>::run(N,K,R,U,F,T,S); + if(D == 4) + RunStride<Scalar,4>::run(N,K,R,U,F,T,S); + if(D == 8) + RunStride<Scalar,8>::run(N,K,R,U,F,T,S); + if(D == 16) + RunStride<Scalar,16>::run(N,K,R,U,F,T,S); + if(D == 32) + RunStride<Scalar,32>::run(N,K,R,U,F,T,S); +} + diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b60ec849944b9415d4ad6e1cd3a627cafdc55854 --- /dev/null +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define UNROLL 1 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 2 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 3 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 4 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 5 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 6 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 7 +#include<bench_unroll_stride.hpp> +#undef UNROLL +#define UNROLL 8 +#include<bench_unroll_stride.hpp> +#undef UNROLL + +template<class Scalar> +struct RunStride<Scalar,STRIDE> { +static void run_1(int N, int K, int R, int F, int T, int S) { + Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S); +} +static void run_2(int N, int K, int R, int F, int T, int S) { + Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S); +} +static void run_3(int N, int K, int R, int F, int T, int S) { + Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S); +} +static void run_4(int N, int K, int R, int F, int T, int S) { + Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S); +} +static void run_5(int N, int K, int R, int F, int T, int S) { + Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S); +} +static void run_6(int N, int K, int R, int F, int T, int S) { + Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S); +} +static void run_7(int N, int K, int R, int F, int T, int S) { + Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S); +} +static void run_8(int N, int K, int R, int F, int T, int S) { + Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S); +} + +static void run(int N, int K, int R, int U, int F, int T, int S) { + if(U==1) { + run_1(N,K,R,F,T,S); + } + if(U==2) { + run_2(N,K,R,F,T,S); + } + if(U==3) { + run_3(N,K,R,F,T,S); + } + if(U==4) { + run_4(N,K,R,F,T,S); + } + if(U==5) { + run_5(N,K,R,F,T,S); + } + if(U==6) { + run_6(N,K,R,F,T,S); + } + if(U==7) { + run_7(N,K,R,F,T,S); + } + if(U==8) { + run_8(N,K,R,F,T,S); + } +} +}; + diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0992c5b54b6277f99d728710a37182695d3a6f92 --- /dev/null +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -0,0 +1,148 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +template<class Scalar> +struct Run<Scalar,UNROLL,STRIDE> { +static void run(int N, int K, int R, int F, int T, int S) { + Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K); + Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K); + Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K); + + Kokkos::deep_copy(A,Scalar(1.5)); + Kokkos::deep_copy(B,Scalar(2.5)); + Kokkos::deep_copy(C,Scalar(3.5)); + + Kokkos::Timer timer; + Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)), + KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) { + const int n = team.league_rank(); + for(int r=0; r<R; r++) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) { + Scalar a1 = A(n,i,0); + const Scalar b = B(n,i,0); +#if(UNROLL>1) + Scalar a2 = a1*1.3; +#endif +#if(UNROLL>2) + Scalar a3 = a2*1.1; +#endif +#if(UNROLL>3) + Scalar a4 = a3*1.1; +#endif +#if(UNROLL>4) + Scalar a5 = a4*1.3; +#endif +#if(UNROLL>5) + Scalar a6 = a5*1.1; +#endif +#if(UNROLL>6) + Scalar a7 = a6*1.1; +#endif +#if(UNROLL>7) + Scalar a8 = a7*1.1; +#endif + + + for(int f = 0; f<F; f++) { + a1 += b*a1; +#if(UNROLL>1) + a2 += b*a2; +#endif +#if(UNROLL>2) + a3 += b*a3; +#endif +#if(UNROLL>3) + a4 += b*a4; +#endif +#if(UNROLL>4) + a5 += b*a5; +#endif +#if(UNROLL>5) + a6 += b*a6; +#endif +#if(UNROLL>6) + a7 += b*a7; +#endif +#if(UNROLL>7) + a8 += b*a8; +#endif + + + } +#if(UNROLL==1) + C(n,i,0) = a1; +#endif +#if(UNROLL==2) + C(n,i,0) = a1+a2; +#endif +#if(UNROLL==3) + C(n,i,0) = a1+a2+a3; +#endif +#if(UNROLL==4) + C(n,i,0) = a1+a2+a3+a4; +#endif +#if(UNROLL==5) + C(n,i,0) = a1+a2+a3+a4+a5; +#endif +#if(UNROLL==6) + C(n,i,0) = a1+a2+a3+a4+a5+a6; +#endif +#if(UNROLL==7) + C(n,i,0) = a1+a2+a3+a4+a5+a6+a7; +#endif +#if(UNROLL==8) + C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8; +#endif + + }); + } + }); + Kokkos::fence(); + double seconds = timer.seconds(); + + double bytes = 1.0*N*K*R*3*sizeof(Scalar); + double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); + printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds); +} +}; + diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f545247212ab6057baca8bfb39463daa760747db --- /dev/null +++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<Kokkos_Core.hpp> +#include<impl/Kokkos_Timer.hpp> +#include<bench.hpp> + +int main(int argc, char* argv[]) { + Kokkos::initialize(); + + + if(argc<10) { + printf("Arguments: N K R D U F T S\n"); + printf(" P: Precision (1==float, 2==double)\n"); + printf(" N,K: dimensions of the 2D array to allocate\n"); + printf(" R: how often to loop through the K dimension with each team\n"); + printf(" D: distance between loaded elements (stride)\n"); + printf(" U: how many independent flops to do per load\n"); + printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf(" T: team size\n"); + printf(" S: shared memory per team (used to control occupancy on GPUs)\n"); + printf("Example Input GPU:\n"); + printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n"); + printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n"); + printf(" Compute Bound : 2 100000 1024 1 1 8 64 256 6000\n"); + printf(" Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n"); + printf(" Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n"); + Kokkos::finalize(); + return 0; + } + + + int P = atoi(argv[1]); + int N = atoi(argv[2]); + int K = atoi(argv[3]); + int R = atoi(argv[4]); + int D = atoi(argv[5]); + int U = atoi(argv[6]); + int F = atoi(argv[7]); + int T = atoi(argv[8]); + int S = atoi(argv[9]); + + if(U>8) {printf("U must be 1-8\n"); return 0;} + if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;} + if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;} + + if(P==1) { + run_stride_unroll<float>(N,K,R,D,U,F,T,S); + } + if(P==2) { + run_stride_unroll<double>(N,K,R,D,U,F,T,S); + } + + Kokkos::finalize(); +} + diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..fd1feab6fa8c22d13c726dad7312e769bbdffc37 --- /dev/null +++ b/lib/kokkos/benchmarks/gather/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ${HOME}/kokkos +SRC = $(wildcard *.cpp) +KOKKOS_DEVICES=Cuda +KOKKOS_CUDA_OPTIONS=enable_lambda + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +EXE = gather.cuda +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +EXE = gather.host +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +CXXFLAGS = -O3 -g + +DEPFLAGS = -M +LINK = ${CXX} +LINKFLAGS = + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +$(warning ${KOKKOS_CPPFLAGS}) +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/benchmarks/gather/gather.hpp b/lib/kokkos/benchmarks/gather/gather.hpp new file mode 100644 index 0000000000000000000000000000000000000000..406bd28983bd696e12cb82aeea388f94b6e80047 --- /dev/null +++ b/lib/kokkos/benchmarks/gather/gather.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +template<class Scalar, int UNROLL> +struct RunGather { + static void run(int N, int K, int D, int R, int F); +}; + +#define UNROLL 1 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 2 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 3 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 4 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 5 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 6 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 7 +#include<gather_unroll.hpp> +#undef UNROLL +#define UNROLL 8 +#include<gather_unroll.hpp> +#undef UNROLL + +template<class Scalar> +void run_gather_test(int N, int K, int D, int R, int U, int F) { + if(U == 1) + RunGather<Scalar,1>::run(N,K,D,R,F); + if(U == 2) + RunGather<Scalar,2>::run(N,K,D,R,F); + if(U == 3) + RunGather<Scalar,3>::run(N,K,D,R,F); + if(U == 4) + RunGather<Scalar,4>::run(N,K,D,R,F); + if(U == 5) + RunGather<Scalar,5>::run(N,K,D,R,F); + if(U == 6) + RunGather<Scalar,6>::run(N,K,D,R,F); + if(U == 7) + RunGather<Scalar,7>::run(N,K,D,R,F); + if(U == 8) + RunGather<Scalar,8>::run(N,K,D,R,F); +} diff --git a/lib/kokkos/benchmarks/gather/gather_unroll.hpp b/lib/kokkos/benchmarks/gather/gather_unroll.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1d01b26ca77f2b7e58f5a1cbd5bbe251c3d558d3 --- /dev/null +++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp @@ -0,0 +1,169 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<Kokkos_Core.hpp> +#include<Kokkos_Random.hpp> + +template<class Scalar> +struct RunGather<Scalar,UNROLL> { +static void run(int N, int K, int D, int R, int F) { + Kokkos::View<int**> connectivity("Connectivity",N,K); + Kokkos::View<Scalar*> A_in("Input",N); + Kokkos::View<Scalar*> B_in("Input",N); + Kokkos::View<Scalar*> C("Output",N); + + Kokkos::Random_XorShift64_Pool<> rand_pool(12313); + + Kokkos::deep_copy(A_in,1.5); + Kokkos::deep_copy(B_in,2.0); + + Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in); + Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in); + + Kokkos::parallel_for("InitKernel",N, + KOKKOS_LAMBDA (const int& i) { + auto rand_gen = rand_pool.get_state(); + for( int jj=0; jj<K; jj++) { + connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N; + } + rand_pool.free_state(rand_gen); + }); + Kokkos::fence(); + + + Kokkos::Timer timer; + for(int r = 0; r<R; r++) { + Kokkos::parallel_for("BenchmarkKernel",N, + KOKKOS_LAMBDA (const int& i) { + Scalar c = Scalar(0.0); + for( int jj=0; jj<K; jj++) { + const int j = connectivity(i,jj); + Scalar a1 = A(j); + const Scalar b = B(j); +#if(UNROLL>1) + Scalar a2 = a1*Scalar(1.3); +#endif +#if(UNROLL>2) + Scalar a3 = a2*Scalar(1.1); +#endif +#if(UNROLL>3) + Scalar a4 = a3*Scalar(1.1); +#endif +#if(UNROLL>4) + Scalar a5 = a4*Scalar(1.3); +#endif +#if(UNROLL>5) + Scalar a6 = a5*Scalar(1.1); +#endif +#if(UNROLL>6) + Scalar a7 = a6*Scalar(1.1); +#endif +#if(UNROLL>7) + Scalar a8 = a7*Scalar(1.1); +#endif + + + for(int f = 0; f<F; f++) { + a1 += b*a1; +#if(UNROLL>1) + a2 += b*a2; +#endif +#if(UNROLL>2) + a3 += b*a3; +#endif +#if(UNROLL>3) + a4 += b*a4; +#endif +#if(UNROLL>4) + a5 += b*a5; +#endif +#if(UNROLL>5) + a6 += b*a6; +#endif +#if(UNROLL>6) + a7 += b*a7; +#endif +#if(UNROLL>7) + a8 += b*a8; +#endif + + + } +#if(UNROLL==1) + c += a1; +#endif +#if(UNROLL==2) + c += a1+a2; +#endif +#if(UNROLL==3) + c += a1+a2+a3; +#endif +#if(UNROLL==4) + c += a1+a2+a3+a4; +#endif +#if(UNROLL==5) + c += a1+a2+a3+a4+a5; +#endif +#if(UNROLL==6) + c += a1+a2+a3+a4+a5+a6; +#endif +#if(UNROLL==7) + c += a1+a2+a3+a4+a5+a6+a7; +#endif +#if(UNROLL==8) + c += a1+a2+a3+a4+a5+a6+a7+a8; +#endif + + } + C(i) = c ; + }); + Kokkos::fence(); + } + double seconds = timer.seconds(); + + double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar); + double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); + double gather_ops = 1.0*N*K*R*2; + printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds); +} +}; diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp b/lib/kokkos/benchmarks/gather/main.cpp similarity index 54% rename from lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp rename to lib/kokkos/benchmarks/gather/main.cpp index 4eb80d03f1fa0c26a2ba9524b16719dcf2a72e99..161c6f20919639845adecd96d74d978c65ea952f 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp +++ b/lib/kokkos/benchmarks/gather/main.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,73 +36,58 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <Kokkos_HostSpace.hpp> - -#include <impl/Kokkos_HBWAllocators.hpp> -#include <impl/Kokkos_Error.hpp> - - -#include <stdint.h> // uintptr_t -#include <cstdlib> // for malloc, realloc, and free -#include <cstring> // for memcpy +#include<Kokkos_Core.hpp> +#include<impl/Kokkos_Timer.hpp> +#include<gather.hpp> -#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE) -#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc -#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES -#endif +int main(int argc, char* argv[]) { + Kokkos::initialize(argc,argv); -#include <sstream> -#include <iostream> -#ifdef KOKKOS_HAVE_HBWSPACE -#include <memkind.h> + if(argc<8) { + printf("Arguments: S N K D\n"); + printf(" S: Scalar Type Size (1==float, 2==double, 4=complex<double>)\n"); + printf(" N: Number of entities\n"); + printf(" K: Number of things to gather per entity\n"); + printf(" D: Max distance of gathered things of an entity\n"); + printf(" R: how often to loop through the K dimension with each team\n"); + printf(" U: how many independent flops to do per load\n"); + printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf("Example Input GPU:\n"); + printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n"); + printf(" Cache Bound : 2 10000000 64 1 10 1 1\n"); + printf(" Cache Gather : 2 10000000 64 256 10 1 1\n"); + printf(" Global Gather : 2 100000000 16 100000000 1 1 1\n"); + printf(" Typical MD : 2 100000 32 512 1000 8 2\n"); + Kokkos::finalize(); + return 0; + } -namespace Kokkos { -namespace Experimental { -namespace Impl { -#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB) -/*--------------------------------------------------------------------------*/ -void* HBWMallocAllocator::allocate( size_t size ) -{ - std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl; - void * ptr = NULL; - if (size) { - ptr = memkind_malloc(MEMKIND_TYPE,size); + int S = atoi(argv[1]); + int N = atoi(argv[2]); + int K = atoi(argv[3]); + int D = atoi(argv[4]); + int R = atoi(argv[5]); + int U = atoi(argv[6]); + int F = atoi(argv[7]); - if (!ptr) - { - std::ostringstream msg ; - msg << name() << ": allocate(" << size << ") FAILED"; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } + if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;} + if( N<D ) {printf("N must be larger or equal to D\n"); return 0; } + if(S==1) { + run_gather_test<float>(N,K,D,R,U,F); } - return ptr; -} - -void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ ) -{ - if (ptr) { - memkind_free(MEMKIND_TYPE,ptr); + if(S==2) { + run_gather_test<double>(N,K,D,R,U,F); } -} - -void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size) -{ - void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size); - - if (new_size > 0u && ptr == NULL) { - Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory"); + if(S==4) { + run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F); } - return ptr; + Kokkos::finalize(); } -} // namespace Impl -} // namespace Experimental -} // namespace Kokkos -#endif diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper new file mode 100755 index 0000000000000000000000000000000000000000..cb206cf88b2c4e3a4f289bc919cc272e22749f36 --- /dev/null +++ b/lib/kokkos/bin/nvcc_wrapper @@ -0,0 +1,284 @@ +#!/bin/bash +# +# This shell script (nvcc_wrapper) wraps both the host compiler and +# NVCC, if you are building legacy C or C++ code with CUDA enabled. +# The script remedies some differences between the interface of NVCC +# and that of the host compiler, in particular for linking. +# It also means that a legacy code doesn't need separate .cu files; +# it can just use .cpp files. +# +# Default settings: change those according to your machine. For +# example, you may have have two different wrappers with either icpc +# or g++ as their back-end compiler. The defaults can be overwritten +# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). + +default_arch="sm_35" +#default_arch="sm_50" + +# +# The default C++ compiler. +# +host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} +#host_compiler="icpc" +#host_compiler="/usr/local/gcc/4.8.3/bin/g++" +#host_compiler="/usr/local/gcc/4.9.1/bin/g++" + +# +# Internal variables +# + +# C++ files +cpp_files="" + +# Host compiler arguments +xcompiler_args="" + +# Cuda (NVCC) only arguments +cuda_args="" + +# Arguments for both NVCC and Host compiler +shared_args="" + +# Linker arguments +xlinker_args="" + +# Object files passable to NVCC +object_files="" + +# Link objects for the host linker only +object_files_xlinker="" + +# Shared libraries with version numbers are not handled correctly by NVCC +shared_versioned_libraries_host="" +shared_versioned_libraries="" + +# Does the User set the architecture +arch_set=0 + +# Does the user overwrite the host compiler +ccbin_set=0 + +#Error code of compilation +error_code=0 + +# Do a dry run without actually compiling +dry_run=0 + +# Skip NVCC compilation and use host compiler directly +host_only=0 + +# Enable workaround for CUDA 6.5 for pragma ident +replace_pragma_ident=0 + +# Mark first host compiler argument +first_xcompiler_arg=1 + +temp_dir=${TMPDIR:-/tmp} + +# Check if we have an optimization argument already +optimization_applied=0 + +#echo "Arguments: $# $@" + +while [ $# -gt 0 ] +do + case $1 in + #show the executed command + --show|--nvcc-wrapper-show) + dry_run=1 + ;; + #run host compilation only + --host-only) + host_only=1 + ;; + #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros + --replace-pragma-ident) + replace_pragma_ident=1 + ;; + #handle source files to be compiled as cuda files + *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) + cpp_files="$cpp_files $1" + ;; + # Ensure we only have one optimization flag because NVCC doesn't allow muliple + -O*) + if [ $optimization_applied -eq 1 ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." + else + shared_args="$shared_args $1" + optimization_applied=1 + fi + ;; + #Handle shared args (valid for both nvcc and the host compiler) + -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + shared_args="$shared_args $1" + ;; + #Handle shared args that have an argument + -o|-MT) + shared_args="$shared_args $1 $2" + shift + ;; + #Handle known nvcc args + -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) + cuda_args="$cuda_args $1" + ;; + #Handle more known nvcc args + --expt-extended-lambda|--expt-relaxed-constexpr) + cuda_args="$cuda_args $1" + ;; + #Handle known nvcc args that have an argument + -rdc|-maxrregcount|--default-stream) + cuda_args="$cuda_args $1 $2" + shift + ;; + #Handle c++11 setting + --std=c++11|-std=c++11) + shared_args="$shared_args $1" + ;; + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 + -std=c++98|--std=c++98) + ;; + #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor + -pedantic|-Wpedantic|-ansi) + ;; + #strip -Xcompiler because we add it + -Xcompiler) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$2" + fi + shift + ;; + #strip of "-x cu" because we add that + -x) + if [[ $2 != "cu" ]]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,$2" + fi + fi + shift + ;; + #Handle -ccbin (if its not set we can set it to a default value) + -ccbin) + cuda_args="$cuda_args $1 $2" + ccbin_set=1 + host_compiler=$2 + shift + ;; + #Handle -arch argument (if its not set use a default + -arch*) + cuda_args="$cuda_args $1" + arch_set=1 + ;; + #Handle -Xcudafe argument + -Xcudafe) + cuda_args="$cuda_args -Xcudafe $2" + shift + ;; + #Handle args that should be sent to the linker + -Wl*) + xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" + host_linker_args="$host_linker_args ${1:4:${#1}}" + ;; + #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking + *.a|*.so|*.o|*.obj) + object_files="$object_files $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking + *.dylib) + object_files="$object_files -Xlinker $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle shared libraries with *.so.* names which nvcc can't do. + *.so.*) + shared_versioned_libraries_host="$shared_versioned_libraries_host $1" + shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" + ;; + #All other args are sent to the host compiler + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args=$1 + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + + shift +done + +#Add default host compiler if necessary +if [ $ccbin_set -ne 1 ]; then + cuda_args="$cuda_args -ccbin $host_compiler" +fi + +#Add architecture command +if [ $arch_set -ne 1 ]; then + cuda_args="$cuda_args -arch=$default_arch" +fi + +#Compose compilation command +nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" +if [ $first_xcompiler_arg -eq 0 ]; then + nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" +fi + +#Compose host only command +host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" + +#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' +if [ $replace_pragma_ident -eq 1 ]; then + cpp_files2="" + for file in $cpp_files + do + var=`grep pragma ${file} | grep ident | grep "#"` + if [ "${#var}" -gt 0 ] + then + sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file + cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" + else + cpp_files2="$cpp_files2 $file" + fi + done + cpp_files=$cpp_files2 + #echo $cpp_files +fi + +if [ "$cpp_files" ]; then + nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" +else + nvcc_command="$nvcc_command $object_files" +fi + +if [ "$cpp_files" ]; then + host_command="$host_command $object_files $cpp_files" +else + host_command="$host_command $object_files" +fi + +#Print command for dryrun +if [ $dry_run -eq 1 ]; then + if [ $host_only -eq 1 ]; then + echo $host_command + else + echo $nvcc_command + fi + exit 0 +fi + +#Run compilation command +if [ $host_only -eq 1 ]; then + $host_command +else + $nvcc_command +fi +error_code=$? + +#Report error code +exit $error_code diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake index 205f5e2a98898b8247b0f199afcc2e3ac4bc97b4..6f26d857c09acf7bb24c2c5449a54f5d507deae8 100644 --- a/lib/kokkos/cmake/deps/CUSPARSE.cmake +++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake @@ -53,12 +53,12 @@ # ************************************************************************ # @HEADER -include(${TRIBITS_DEPS_DIR}/CUDA.cmake) +#include(${TRIBITS_DEPS_DIR}/CUDA.cmake) -IF (TPL_ENABLE_CUDA) - GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) - GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) - GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) -ENDIF() +#IF (TPL_ENABLE_CUDA) +# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) +# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) +# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) +# TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +#ENDIF() diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake index 34cd216f810c9a829dbcdc13ed5e9c3be81752ac..879d80172068db6a4afe62f9687dad9219859e2d 100644 --- a/lib/kokkos/cmake/tribits.cmake +++ b/lib/kokkos/cmake/tribits.cmake @@ -1,6 +1,16 @@ INCLUDE(CMakeParseArguments) INCLUDE(CTest) +cmake_policy(SET CMP0054 NEW) + +IF(NOT DEFINED ${PROJECT_NAME}) + project(Kokkos) +ENDIF() + +IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}}) + SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) +ENDIF() + FUNCTION(ASSERT_DEFINED VARS) FOREACH(VAR ${VARS}) IF(NOT DEFINED ${VAR}) @@ -75,6 +85,13 @@ MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) ENDMACRO() + +function(INCLUDE_DIRECTORIES) + cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN}) + _INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS}) +endfunction() + + MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) SET(PROP_VALUES) FOREACH(TARGET_X ${ARGN}) @@ -271,6 +288,11 @@ ENDFUNCTION() ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) +FUNCTION(TRIBITS_ADD_TEST) +ENDFUNCTION() +FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE) +ENDFUNCTION() + FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) SET(options STANDARD_PASS_OUTPUT WILL_FAIL) diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh old mode 100755 new mode 100644 diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh old mode 100755 new mode 100644 diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh old mode 100755 new mode 100644 diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh old mode 100755 new mode 100644 diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh old mode 100755 new mode 100644 diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt index 9f56f2fd48d30da63f28662431711c8b20d1f4a5..961e4186ec6e0fd24c3b71bddcbcbaa2873a41ca 100644 --- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -91,9 +91,20 @@ Step 3: // -------------------------------------------------------------------------------- // -Step 4: - 4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github +Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github + 4.1. Generate Changelog (You need a github API token) + + Close all Open issues with "InDevelop" tag on github + + (Not from kokkos directory) + gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' + + (Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md) + (Make desired changes to CHANGELOG.md to enhance clarity) + (Commit and push the CHANGELOG to develop) + 4.2 Merge develop into Master + - DO NOT fast-forward the merge!!!! (From kokkos directory): @@ -103,7 +114,7 @@ Step 4: git reset --hard origin/master git merge --no-ff origin/develop - 4.2. Update the tag in kokkos/config/master_history.txt + 4.3. Update the tag in kokkos/config/master_history.txt Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate Tag format: #.#.## diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index f2eb674578f2c14442376210dfd1080050fe3917..78c512ccea4d5d1acf2c19c7157104c384be7a61 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -1,3 +1,6 @@ tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4 tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a - +tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e +tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e +tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304 +tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966 diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper index 6093cb61bdaf5a3f030406b8e149580b818920d0..cb206cf88b2c4e3a4f289bc919cc272e22749f36 100755 --- a/lib/kokkos/config/nvcc_wrapper +++ b/lib/kokkos/config/nvcc_wrapper @@ -121,6 +121,10 @@ do -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) cuda_args="$cuda_args $1" ;; + #Handle more known nvcc args + --expt-extended-lambda|--expt-relaxed-constexpr) + cuda_args="$cuda_args $1" + ;; #Handle known nvcc args that have an argument -rdc|-maxrregcount|--default-stream) cuda_args="$cuda_args $1 $2" diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index aac036a8f37abfedabac7a4849289ecb3cbdfcd0..21b8bbff657700b9a6439cc975ea8a68b2c1e8e2 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -16,6 +16,8 @@ elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then MACHINE=bowman elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name MACHINE=shepard +elif [[ "$HOSTNAME" =~ apollo ]]; then + MACHINE=apollo elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then MACHINE=sems else @@ -28,6 +30,7 @@ IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" +CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" @@ -44,20 +47,102 @@ BUILD_ONLY=False declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 TEST_SCRIPT=False SKIP_HWLOC=False +SPOT_CHECK=False -ARCH_FLAG="" +PRINT_HELP=False +OPT_FLAG="" +KOKKOS_OPTIONS="" + + +# +# Handle arguments +# + +while [[ $# > 0 ]] +do +key="$1" +case $key in +--kokkos-path*) +KOKKOS_PATH="${key#*=}" +;; +--build-list*) +CUSTOM_BUILD_LIST="${key#*=}" +;; +--debug*) +DEBUG=True +;; +--build-only*) +BUILD_ONLY=True +;; +--test-script*) +TEST_SCRIPT=True +;; +--skip-hwloc*) +SKIP_HWLOC=True +;; +--num*) +NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" +;; +--dry-run*) +DRYRUN=True +;; +--spot-check*) +SPOT_CHECK=True +;; +--arch*) +ARCH_FLAG="--arch=${key#*=}" +;; +--opt-flag*) +OPT_FLAG="${key#*=}" +;; +--with-cuda-options*) +KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" +;; +--help*) +PRINT_HELP=True +;; +*) +# args, just append +ARGS="$ARGS $1" +;; +esac +shift +done + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) + +# set kokkos path +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi # # Machine specific config # if [ "$MACHINE" = "sems" ]; then - source /projects/modulefiles/utils/sems-modules-init.sh - source /projects/modulefiles/utils/kokkos-modules-init.sh + source /projects/sems/modulefiles/utils/sems-modules-init.sh + + BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base" - CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base" + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="" + fi + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -66,12 +151,15 @@ if [ "$MACHINE" = "sems" ]; then "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" ) + fi elif [ "$MACHINE" = "white" ]; then source /etc/profile.d/modules.sh @@ -80,19 +168,20 @@ elif [ "$MACHINE" = "white" ]; then BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" - CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0" # Don't do pthread on white GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" ) - - ARCH_FLAG="--arch=Power8" - NUM_JOBS_TO_RUN_IN_PARALLEL=8 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Power8,Kepler37" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "bowman" ]; then source /etc/profile.d/modules.sh @@ -105,11 +194,13 @@ elif [ "$MACHINE" = "bowman" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" ) - ARCH_FLAG="--arch=KNL" - NUM_JOBS_TO_RUN_IN_PARALLEL=8 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=KNL" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "shepard" ]; then source /etc/profile.d/modules.sh @@ -122,58 +213,84 @@ elif [ "$MACHINE" = "shepard" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" ) - ARCH_FLAG="--arch=HSW" - NUM_JOBS_TO_RUN_IN_PARALLEL=8 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=HSW" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 + +elif [ "$MACHINE" = "apollo" ]; then + source /projects/sems/modulefiles/utils/sems-modules-init.sh + module use /home/projects/modulefiles/local/x86-64 + module load kokkos-env + + module load sems-git + module load sems-tex + module load sems-cmake/3.5.2 + module load sems-gdb + + SKIP_HWLOC=True + + BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + + CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44" + NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" + + BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" + BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" + BUILD_LIST_CLANG="Serial,Pthread,OpenMP" + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" + "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" + "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + fi + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SNB,Kepler35" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 else echo "Unhandled machine $MACHINE" >&2 exit 1 fi + + export OMP_NUM_THREADS=4 declare -i NUM_RESULTS_TO_KEEP=7 RESULT_ROOT_PREFIX=TestAll -SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) - -# -# Handle arguments -# - -while [[ $# > 0 ]] -do -key="$1" -case $key in ---kokkos-path*) -KOKKOS_PATH="${key#*=}" -;; ---build-list*) -CUSTOM_BUILD_LIST="${key#*=}" -;; ---debug*) -DEBUG=True -;; ---build-only*) -BUILD_ONLY=True -;; ---test-script*) -TEST_SCRIPT=True -;; ---skip-hwloc*) -SKIP_HWLOC=True -;; ---num*) -NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" -;; ---dry-run*) -DRYRUN=True -;; ---help) +if [ "$PRINT_HELP" = "True" ]; then echo "test_all_sandia <ARGS> <OPTIONS>:" echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" echo " Defaults to root repo containing this script" @@ -183,6 +300,9 @@ echo "--skip-hwloc: Do not do hwloc tests" echo "--num=N: Number of jobs to run in parallel " echo "--dry-run: Just print what would be executed" echo "--build-only: Just do builds, don't run anything" +echo "--opt-flag=FLAG: Optimization flag (default: -O3)" +echo "--arch=ARCHITECTURE: overwrite architecture flags" +echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" echo "--build-list=BUILD,BUILD,BUILD..." echo " Provide a comma-separated list of builds instead of running all builds" echo " Valid items:" @@ -220,21 +340,6 @@ echo " hit ctrl-z" echo " % kill -9 %1" echo exit 0 -;; -*) -# args, just append -ARGS="$ARGS $1" -;; -esac -shift -done - -# set kokkos path -if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT -else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi # set build type @@ -381,11 +486,15 @@ single_build_and_test() { local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) fi + if [[ "$OPT_FLAG" = "" ]]; then + OPT_FLAG="-O3" + fi + if [[ "$build_type" = *debug* ]]; then local extra_args="$extra_args --debug" local cxxflags="-g $compiler_warning_flags" else - local cxxflags="-O3 $compiler_warning_flags" + local cxxflags="$OPT_FLAG $compiler_warning_flags" fi if [[ "$compiler" == cuda* ]]; then @@ -393,7 +502,9 @@ single_build_and_test() { export TMPDIR=$(pwd) fi - # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags" + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + fi echo " Starting job $desc" @@ -440,13 +551,14 @@ run_in_background() { local compiler=$1 local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL - if [[ "$BUILD_ONLY" == True ]]; then - num_jobs=8 - else + # don't override command line input + # if [[ "$BUILD_ONLY" == True ]]; then + # num_jobs=8 + # else if [[ "$compiler" == cuda* ]]; then num_jobs=1 fi - fi + # fi wait_for_jobs $num_jobs single_build_and_test $* & diff --git a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh new file mode 100755 index 0000000000000000000000000000000000000000..d2a7a533d5b34c044edc017605c85e93aaa13161 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh @@ -0,0 +1,50 @@ +#!/bin/bash -le + +export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update +export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine + +#rm -rf ${KOKKOS_PATH} +#rm -rf ${TRILINOS_UPDATED_PATH} +#rm -rf ${TRILINOS_PRISTINE_PATH} + +#Already done: +if [ ! -d "${TRILINOS_UPDATED_PATH}" ]; then + git clone https://github.com/trilinos/trilinos ${TRILINOS_UPDATED_PATH} +fi +if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then + git clone https://github.com/trilinos/trilinos ${TRILINOS_PRISTINE_PATH} +fi + +cd ${TRILINOS_UPDATED_PATH} +git checkout develop +git reset --hard origin/develop +git pull +cd .. + +python kokkos/config/snapshot.py ${KOKKOS_PATH} ${TRILINOS_UPDATED_PATH}/packages + +cd ${TRILINOS_UPDATED_PATH} +echo "" +echo "" +echo "Trilinos State:" +git log --pretty=oneline --since=2.days +SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'` +cd .. + +cd ${TRILINOS_PRISTINE_PATH} +git status +git log --pretty=oneline --since=2.days +echo "Checkout develop" +git checkout develop +echo "Pull" +git pull +echo "Checkout SHA" +git checkout ${SHA} +cd .. + +cd ${TRILINOS_PRISTINE_PATH} +echo "" +echo "" +echo "Trilinos Pristine State:" +git log --pretty=oneline --since=2.days +cd .. diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 726d403452bab92dfaab0a3275d9be42af6afa4f..403ac746f6d6109a40d117b549235eae76965119 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -1,6 +1,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) SET(SOURCES diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index e7abaf44ce07fb725bb1947d86b573ac6a15dae4..fa3bc777013fd5148a2a49c26c00df4aba9786e7 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= -lpthread + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests TEST_TARGETS = diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index 8183adaa60b8226fdd5979253cc619ff90e701ba..e7afad905bff4a83859e005944f0904a9edc0699 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -83,7 +83,7 @@ TEST_F( cuda, dynrankview_perf ) { std::cout << "Cuda" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf<Kokkos::Cuda>( 4096 ); + test_dynrankview_op_perf<Kokkos::Cuda>( 40960 ); } TEST_F( cuda, global_2_local) diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index aab6e6988fc847360f02474daab52110a18ef8ef..d96a3f74324046862b4740c4d9c3ae7a178937d8 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size ) typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; - const size_type dim2 = 900; - const size_type dim3 = 300; + const size_type dim2 = 90; + const size_type dim3 = 30; double elapsed_time_view = 0; double elapsed_time_compview = 0; diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index 1230df4d97741123f2be0a011fd8fd7a40fbd35f..3a0196ee4c5ea48fcd9e1895212f655c6b81e6a1 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -261,9 +261,6 @@ public: modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")), modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) { -#if ! KOKKOS_USING_EXP_VIEW - Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ()); -#else if ( int(d_view.rank) != int(h_view.rank) || d_view.dimension_0() != h_view.dimension_0() || d_view.dimension_1() != h_view.dimension_1() || @@ -284,7 +281,6 @@ public: d_view.span() != h_view.span() ) { Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); } -#endif } //@} @@ -315,13 +311,13 @@ public: template< class Device > KOKKOS_INLINE_FUNCTION const typename Impl::if_c< - Impl::is_same<typename t_dev::memory_space, + std::is_same<typename t_dev::memory_space, typename Device::memory_space>::value, t_dev, t_host>::type& view () const { return Impl::if_c< - Impl::is_same< + std::is_same< typename t_dev::memory_space, typename Device::memory_space>::value, t_dev, @@ -347,13 +343,13 @@ public: /// appropriate template parameter. template<class Device> void sync( const typename Impl::enable_if< - ( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || - ( Impl::is_same< Device , int>::value) + ( std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || + ( std::is_same< Device , int>::value) , int >::type& = 0) { const unsigned int dev = Impl::if_c< - Impl::is_same< + std::is_same< typename t_dev::memory_space, typename Device::memory_space>::value , unsigned int, @@ -370,7 +366,7 @@ public: modified_host() = modified_device() = 0; } } - if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) { + if(std::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) { t_dev::execution_space::fence(); t_host::execution_space::fence(); } @@ -378,13 +374,13 @@ public: template<class Device> void sync ( const typename Impl::enable_if< - ( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || - ( Impl::is_same< Device , int>::value) + ( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || + ( std::is_same< Device , int>::value) , int >::type& = 0 ) { const unsigned int dev = Impl::if_c< - Impl::is_same< + std::is_same< typename t_dev::memory_space, typename Device::memory_space>::value, unsigned int, @@ -405,7 +401,7 @@ public: { const unsigned int dev = Impl::if_c< - Impl::is_same< + std::is_same< typename t_dev::memory_space, typename Device::memory_space>::value , unsigned int, @@ -431,7 +427,7 @@ public: void modify () { const unsigned int dev = Impl::if_c< - Impl::is_same< + std::is_same< typename t_dev::memory_space, typename Device::memory_space>::value, unsigned int, @@ -514,11 +510,7 @@ public: //! The allocation size (same as Kokkos::View::capacity). size_t capacity() const { -#if KOKKOS_USING_EXP_VIEW return d_view.span(); -#else - return d_view.capacity(); -#endif } //! Get stride(s) for each dimension. @@ -555,8 +547,6 @@ public: // Partial specializations of Kokkos::subview() for DualView objects. // -#if KOKKOS_USING_EXP_VIEW - namespace Kokkos { namespace Impl { @@ -590,352 +580,6 @@ subview( const DualView<D,A1,A2,A3> & src , Args ... args ) } /* namespace Kokkos */ -#else - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// -// Partial specializations of Kokkos::subview() for DualView objects. -// - -namespace Kokkos { -namespace Impl { - -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type - > -struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > -{ -private: - - typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ; - - enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; - enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; - enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; - enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; - enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; - enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; - enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; - enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; - - // The source view rank must be equal to the input argument rank - // Once a void argument is encountered all subsequent arguments must be void. - enum { InputRank = - Impl::StaticAssert<( SrcViewType::rank == - ( V0 ? 0 : ( - V1 ? 1 : ( - V2 ? 2 : ( - V3 ? 3 : ( - V4 ? 4 : ( - V5 ? 5 : ( - V6 ? 6 : ( - V7 ? 7 : 8 ))))))) )) - && - ( SrcViewType::rank == - ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) - >::value ? SrcViewType::rank : 0 }; - - enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; - enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; - enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; - enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; - enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; - enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; - enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; - enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; - - enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) - + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; - - // Reverse - enum { R0_rev = 0 == InputRank ? 0u : ( - 1 == InputRank ? unsigned(R0) : ( - 2 == InputRank ? unsigned(R1) : ( - 3 == InputRank ? unsigned(R2) : ( - 4 == InputRank ? unsigned(R3) : ( - 5 == InputRank ? unsigned(R4) : ( - 6 == InputRank ? unsigned(R5) : ( - 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; - - typedef typename SrcViewType::array_layout SrcViewLayout ; - - // Choose array layout, attempting to preserve original layout if at all possible. - typedef typename Impl::if_c< - ( // Same Layout IF - // OutputRank 0 - ( OutputRank == 0 ) - || - // OutputRank 1 or 2, InputLayout Left, Interval 0 - // because single stride one or second index has a stride. - ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value ) - || - // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] - // because single stride one or second index has a stride. - ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value ) - ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; - - // Choose data type as a purely dynamic rank array to accomodate a runtime range. - typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , - typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, - typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, - typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, - typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, - typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, - typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, - typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, - typename SrcViewType::value_type ******** - >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; - - // Choose space. - // If the source view's template arg1 or arg2 is a space then use it, - // otherwise use the source view's execution space. - - typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , - typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space - >::type >::type OutputSpace ; - -public: - - // If keeping the layout then match non-data type arguments - // else keep execution space and memory traits. - typedef typename - Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value - , Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type > - , Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace - , typename SrcViewType::memory_traits > - >::type type ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -namespace Kokkos { - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , void , void , void - , void , void , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , void , void , void - , void , void , void , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0); - sub_view.h_view = subview(src.h_view,arg0); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , void , void - , void , void , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , void , void - , void , void , void , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1); - sub_view.h_view = subview(src.h_view,arg0,arg1); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , void - , void , void , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , void - , void , void , void , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , void , void , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , void , void , void , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , void , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , void , void ,void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , void , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , void , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 , class ArgType6 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , ArgType6 , void - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 , - const ArgType6 & arg6 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , ArgType6 , void - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -template< class D , class A1 , class A2 , class A3 , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 > -typename Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , ArgType6 , ArgType7 - >::type -subview( const DualView<D,A1,A2,A3> & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 , - const ArgType6 & arg6 , - const ArgType7 & arg7 ) -{ - typedef typename - Impl::ViewSubview< DualView<D,A1,A2,A3> - , ArgType0 , ArgType1 , ArgType2 , ArgType3 - , ArgType4 , ArgType5 , ArgType6 , ArgType7 - >::type - DstViewType ; - DstViewType sub_view; - sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); - sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); - sub_view.modified_device = src.modified_device; - sub_view.modified_host = src.modified_host; - return sub_view; -} - -} // namespace Kokkos - -#endif /* KOKKOS_USING_EXP_VIEW */ - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index f72277700ad87cd0fe9cb1cdee4c2d34ff69ab80..1ac92b9d17c75cd032620e77bd324274a6746cb9 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -223,14 +223,85 @@ struct DynRankDimTraits { ); } - template < typename DynRankViewType , typename iType > - void verify_dynrankview_rank ( iType N , const DynRankViewType &drv ) - { - if ( static_cast<iType>(drv.rank()) > N ) - { - Kokkos::abort( "Need at least rank arguments to the operator()" ); - } + +/** \brief Debug bounds-checking routines */ +// Enhanced debug checking - most infrastructure matches that of functions in +// Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0 +template< unsigned , typename iType0 , class MapType > +KOKKOS_INLINE_FUNCTION +bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & ) +{ return true ; } + +template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args > +KOKKOS_INLINE_FUNCTION +bool dyn_rank_view_verify_operator_bounds + ( const iType0 & rank + , const MapType & map + , const iType1 & i + , Args ... args + ) +{ + if ( static_cast<iType0>(R) < rank ) { + return ( size_t(i) < map.extent(R) ) + && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); + } + else if ( i != 0 ) { + printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R); + return ( false ) + && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); } + else { + return ( true ) + && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); + } +} + +template< unsigned , class MapType > +inline +void dyn_rank_view_error_operator_bounds( char * , int , const MapType & ) +{} + +template< unsigned R , class MapType , class iType , class ... Args > +inline +void dyn_rank_view_error_operator_bounds + ( char * buf + , int len + , const MapType & map + , const iType & i + , Args ... args + ) +{ + const int n = + snprintf(buf,len," %ld < %ld %c" + , static_cast<unsigned long>(i) + , static_cast<unsigned long>( map.extent(R) ) + , ( sizeof...(Args) ? ',' : ')' ) + ); + dyn_rank_view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); +} + +// op_rank = rank of the operator version that was called +template< typename iType0 , typename iType1 , class MapType , class ... Args > +KOKKOS_INLINE_FUNCTION +void dyn_rank_view_verify_operator_bounds + ( const iType0 & op_rank , const iType1 & rank , const char* label , const MapType & map , Args ... args ) +{ + if ( static_cast<iType0>(rank) > op_rank ) { + Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); + } + + if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label); + dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + Kokkos::abort("DynRankView bounds error"); +#endif + } +} /** \brief Assign compatible default mappings */ @@ -341,7 +412,6 @@ class DynRankView : public ViewTraits< DataType , Properties ... > private: template < class , class ... > friend class DynRankView ; -// template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now... template < class , class ... > friend class Impl::ViewMapping ; public: @@ -504,20 +574,26 @@ private: ( is_layout_left || is_layout_right || is_layout_stride ) }; + template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space + { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + + template< class Space > struct verify_space<Space,false> + { KOKKOS_FORCEINLINE_FUNCTION static void check() + { Kokkos::abort("Kokkos::DynRankView ERROR: attempt to access inaccessible memory space"); }; + }; + // Bounds checking macros #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) -#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ - < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ - Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \ - Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; +// rank of the calling operator - included as first argument in ARG +#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ + DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ + Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ; #else -#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ - < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); +#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ + DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); #endif @@ -532,7 +608,11 @@ public: KOKKOS_INLINE_FUNCTION reference_type operator()() const { - KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) ) + #endif return implementation_map().reference(); //return m_map.reference(0,0,0,0,0,0,0); } @@ -563,12 +643,17 @@ public: return rankone_view(i0); } + // Rank 1 parenthesis template< typename iType > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type operator()(const iType & i0 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) + #endif return m_map.reference(i0); } @@ -577,6 +662,11 @@ public: typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type operator()(const iType & i0 ) const { + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) + #endif return m_map.reference(i0,0,0,0,0,0,0); } @@ -586,7 +676,11 @@ public: typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) ) + #endif return m_map.reference(i0,i1); } @@ -595,7 +689,11 @@ public: typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) ) + #endif return m_map.reference(i0,i1,0,0,0,0,0); } @@ -605,7 +703,11 @@ public: typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) ) + #endif return m_map.reference(i0,i1,i2); } @@ -614,7 +716,11 @@ public: typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) ) + #endif return m_map.reference(i0,i1,i2,0,0,0,0); } @@ -624,7 +730,11 @@ public: typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) ) + #endif return m_map.reference(i0,i1,i2,i3); } @@ -633,7 +743,11 @@ public: typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) ) + #endif return m_map.reference(i0,i1,i2,i3,0,0,0); } @@ -643,7 +757,11 @@ public: typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) ) + #endif return m_map.reference(i0,i1,i2,i3,i4); } @@ -652,7 +770,11 @@ public: typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,0,0); } @@ -662,7 +784,11 @@ public: typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5); } @@ -671,7 +797,11 @@ public: typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5,0); } @@ -681,7 +811,11 @@ public: typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } @@ -1136,13 +1270,13 @@ private: public: - typedef Kokkos::Experimental::ViewTraits + typedef Kokkos::ViewTraits < data_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > traits_type ; - typedef Kokkos::Experimental::View + typedef Kokkos::View < data_type , array_layout , typename SrcTraits::device_type @@ -1154,13 +1288,13 @@ public: static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); - typedef Kokkos::Experimental::ViewTraits + typedef Kokkos::ViewTraits < data_type , array_layout , typename SrcTraits::device_type , MemoryTraits > traits_type ; - typedef Kokkos::Experimental::View + typedef Kokkos::View < data_type , array_layout , typename SrcTraits::device_type @@ -1264,7 +1398,7 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args. if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args { Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); } - typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::Experimental::ViewTraits< D*******, P... > , Args... > metafcn ; + typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ; return metafcn::subview( src.rank() , src , args... ); } @@ -1502,10 +1636,10 @@ void deep_copy typedef typename src_type::memory_space src_memory_space ; enum { DstExecCanAccessSrc = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible }; enum { SrcExecCanAccessDst = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible }; if ( (void *) dst.data() != (void*) src.data() ) { @@ -1666,7 +1800,7 @@ inline typename DynRankView<T,P...>::HostMirror create_mirror( const DynRankView<T,P...> & src , typename std::enable_if< - ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 ) @@ -1684,7 +1818,7 @@ inline typename DynRankView<T,P...>::HostMirror create_mirror( const DynRankView<T,P...> & src , typename std::enable_if< - std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 ) @@ -1779,7 +1913,7 @@ void resize( DynRankView<T,P...> & v , { typedef DynRankView<T,P...> drview_type ; - static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 ); @@ -1803,7 +1937,7 @@ void realloc( DynRankView<T,P...> & v , { typedef DynRankView<T,P...> drview_type ; - static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); const std::string label = v.label(); diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index fb364f0bf252e2ccae8aa04544487bc8f3f1a74f..3277c007d0845485a57ed7aabfa35202f1b22d1b 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -56,7 +56,7 @@ namespace Experimental { * Subviews are not allowed. */ template< typename DataType , typename ... P > -class DynamicView : public Kokkos::Experimental::ViewTraits< DataType , P ... > +class DynamicView : public Kokkos::ViewTraits< DataType , P ... > { public: @@ -75,6 +75,15 @@ private: std::is_same< typename traits::specialize , void >::value , "DynamicView must have trivial data type" ); + + template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space + { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + + template< class Space > struct verify_space<Space,false> + { KOKKOS_FORCEINLINE_FUNCTION static void check() + { Kokkos::abort("Kokkos::DynamicView ERROR: attempt to access inaccessible memory space"); }; + }; + public: typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ; @@ -117,10 +126,10 @@ public: KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace + Kokkos::Impl::MemorySpaceAccess < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space - >::value + >::accessible ? // Runtime size is at the end of the chunk pointer array (*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max )) << m_chunk_shift @@ -179,10 +188,7 @@ public: static_assert( Kokkos::Impl::are_integral<I0,Args...>::value , "Indices must be integral type" ); - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space - >::verify(); + DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); // Which chunk is being indexed. const uintptr_t ic = uintptr_t( i0 >> m_chunk_shift ); @@ -223,15 +229,13 @@ public: { typedef typename traits::value_type value_type ; - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space >::verify(); + DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; if ( m_chunk_max < NC ) { #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - printf("DynamicView::resize_parallel(%lu) m_chunk_max(%lu) NC(%lu)\n" + printf("DynamicView::resize_parallel(%lu) m_chunk_max(%u) NC(%lu)\n" , n , m_chunk_max , NC ); #endif Kokkos::abort("DynamicView::resize_parallel exceeded maximum size"); @@ -269,9 +273,7 @@ public: inline void resize_serial( size_t n ) { - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space >::verify(); + DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; @@ -398,9 +400,7 @@ public: , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 ) , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift ) { - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space >::verify(); + DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); // A functor to deallocate all of the chunks upon final destruction @@ -452,7 +452,7 @@ void deep_copy( const View<T,DP...> & dst typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ; enum { DstExecCanAccessSrc = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible }; if ( DstExecCanAccessSrc ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. @@ -476,7 +476,7 @@ void deep_copy( const DynamicView<T,DP...> & dst typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ; enum { DstExecCanAccessSrc = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible }; if ( DstExecCanAccessSrc ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. diff --git a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4c90e4c238654b5458f24db2083eb85e815b683c --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp @@ -0,0 +1,196 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP +#define KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP + +#include <vector> +#include <Kokkos_Core.hpp> +#include <Kokkos_View.hpp> +#include <Kokkos_DualView.hpp> + +namespace Kokkos { +namespace Experimental { + +template <typename ReportType, typename DeviceType> +class ErrorReporter +{ +public: + + typedef ReportType report_type; + typedef DeviceType device_type; + typedef typename device_type::execution_space execution_space; + + ErrorReporter(int max_results) + : m_numReportsAttempted(""), + m_reports("", max_results), + m_reporters("", max_results) + { + clear(); + } + + int getCapacity() const { return m_reports.h_view.dimension_0(); } + + int getNumReports(); + + int getNumReportAttempts(); + + void getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out); + void getReports( typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out, + typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out); + + void clear(); + + void resize(const size_t new_size); + + bool full() {return (getNumReportAttempts() >= getCapacity()); } + + KOKKOS_INLINE_FUNCTION + bool add_report(int reporter_id, report_type report) const + { + int idx = Kokkos::atomic_fetch_add(&m_numReportsAttempted(), 1); + + if (idx >= 0 && (idx < static_cast<int>(m_reports.d_view.dimension_0()))) { + m_reporters.d_view(idx) = reporter_id; + m_reports.d_view(idx) = report; + return true; + } + else { + return false; + } + } + +private: + + typedef Kokkos::View<report_type *, execution_space> reports_view_t; + typedef Kokkos::DualView<report_type *, execution_space> reports_dualview_t; + + typedef typename reports_dualview_t::host_mirror_space host_mirror_space; + Kokkos::View<int, execution_space> m_numReportsAttempted; + reports_dualview_t m_reports; + Kokkos::DualView<int *, execution_space> m_reporters; + +}; + + +template <typename ReportType, typename DeviceType> +inline int ErrorReporter<ReportType, DeviceType>::getNumReports() +{ + int num_reports = 0; + Kokkos::deep_copy(num_reports,m_numReportsAttempted); + if (num_reports > static_cast<int>(m_reports.h_view.dimension_0())) { + num_reports = m_reports.h_view.dimension_0(); + } + return num_reports; +} + +template <typename ReportType, typename DeviceType> +inline int ErrorReporter<ReportType, DeviceType>::getNumReportAttempts() +{ + int num_reports = 0; + Kokkos::deep_copy(num_reports,m_numReportsAttempted); + return num_reports; +} + +template <typename ReportType, typename DeviceType> +void ErrorReporter<ReportType, DeviceType>::getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out) +{ + int num_reports = getNumReports(); + reporters_out.clear(); + reporters_out.reserve(num_reports); + reports_out.clear(); + reports_out.reserve(num_reports); + + if (num_reports > 0) { + m_reports.template sync<host_mirror_space>(); + m_reporters.template sync<host_mirror_space>(); + + for (int i = 0; i < num_reports; ++i) { + reporters_out.push_back(m_reporters.h_view(i)); + reports_out.push_back(m_reports.h_view(i)); + } + } +} + +template <typename ReportType, typename DeviceType> +void ErrorReporter<ReportType, DeviceType>::getReports( + typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out, + typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out) +{ + int num_reports = getNumReports(); + reporters_out = typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reporters_out",num_reports); + reports_out = typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reports_out",num_reports); + + if (num_reports > 0) { + m_reports.template sync<host_mirror_space>(); + m_reporters.template sync<host_mirror_space>(); + + for (int i = 0; i < num_reports; ++i) { + reporters_out(i) = m_reporters.h_view(i); + reports_out(i) = m_reports.h_view(i); + } + } +} + +template <typename ReportType, typename DeviceType> +void ErrorReporter<ReportType, DeviceType>::clear() +{ + int num_reports=0; + Kokkos::deep_copy(m_numReportsAttempted, num_reports); + m_reports.template modify<execution_space>(); + m_reporters.template modify<execution_space>(); +} + +template <typename ReportType, typename DeviceType> +void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) +{ + m_reports.resize(new_size); + m_reporters.resize(new_size); + Kokkos::fence(); +} + + +} // namespace Experimental +} // namespace kokkos + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp b/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp deleted file mode 100644 index 5dd7a98b893f0418fb31c7ae6026ac30c886f84b..0000000000000000000000000000000000000000 --- a/lib/kokkos/containers/src/Kokkos_SegmentedView.hpp +++ /dev/null @@ -1,531 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_SEGMENTED_VIEW_HPP_ -#define KOKKOS_SEGMENTED_VIEW_HPP_ - -#include <Kokkos_Core.hpp> -#include <impl/Kokkos_Error.hpp> -#include <cstdio> - -#if ! KOKKOS_USING_EXP_VIEW - -namespace Kokkos { -namespace Experimental { - -namespace Impl { - -template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> -struct delete_segmented_view; - -template<class MemorySpace> -inline -void DeviceSetAllocatableMemorySize(size_t) {} - -#if defined( KOKKOS_HAVE_CUDA ) - -template<> -inline -void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) { -#ifdef __CUDACC__ - size_t size_limit; - cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); - if(size_limit<size) - cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); - cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); -#endif -} - -template<> -inline -void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) { -#ifdef __CUDACC__ - size_t size_limit; - cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); - if(size_limit<size) - cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); - cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); -#endif -} - -#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ - -} - -template< class DataType , - class Arg1Type = void , - class Arg2Type = void , - class Arg3Type = void> -class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > -{ -public: - //! \name Typedefs for device types and various Kokkos::View specializations. - //@{ - typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; - - //! The type of a Kokkos::View on the device. - typedef Kokkos::View< typename traits::data_type , - typename traits::array_layout , - typename traits::memory_space , - Kokkos::MemoryUnmanaged > t_dev ; - - -private: - Kokkos::View<t_dev*,typename traits::memory_space> segments_; - - Kokkos::View<int,typename traits::memory_space> realloc_lock; - Kokkos::View<int,typename traits::memory_space> nsegments_; - - size_t segment_length_; - size_t segment_length_m1_; - int max_segments_; - - int segment_length_log2; - - // Dimensions, cardinality, capacity, and offset computation for - // multidimensional array view of contiguous memory. - // Inherits from Impl::Shape - typedef Kokkos::Impl::ViewOffset< typename traits::shape_type - , typename traits::array_layout - > offset_map_type ; - - offset_map_type m_offset_map ; - - typedef Kokkos::View< typename traits::array_intrinsic_type , - typename traits::array_layout , - typename traits::memory_space , - typename traits::memory_traits > array_type ; - - typedef Kokkos::View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::memory_space , - typename traits::memory_traits > const_type ; - - typedef Kokkos::View< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::memory_space , - typename traits::memory_traits > non_const_type ; - - typedef Kokkos::View< typename traits::non_const_data_type , - typename traits::array_layout , - HostSpace , - void > HostMirror ; - - template< bool Accessible > - KOKKOS_INLINE_FUNCTION - typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type - dimension_0_intern() const { return nsegments_() * segment_length_ ; } - - template< bool Accessible > - KOKKOS_INLINE_FUNCTION - typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type - dimension_0_intern() const - { - // In Host space - int n = 0 ; -#if ! defined( __CUDA_ARCH__ ) - Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) ); -#endif - - return n * segment_length_ ; - } - -public: - - enum { Rank = traits::rank }; - - KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; } - - /* \brief return (current) size of dimension 0 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { - enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; - int n = SegmentedView::dimension_0_intern< Accessible >(); - return n ; - } - - /* \brief return size of dimension 1 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; } - /* \brief return size of dimension 2 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; } - /* \brief return size of dimension 3 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; } - /* \brief return size of dimension 4 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; } - /* \brief return size of dimension 5 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; } - /* \brief return size of dimension 6 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; } - /* \brief return size of dimension 7 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; } - - /* \brief return size of dimension 2 */ - KOKKOS_INLINE_FUNCTION typename traits::size_type size() const { - return dimension_0() * - m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * - m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ; - } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename traits::size_type dimension( const iType & i ) const { - if(i==0) - return dimension_0(); - else - return Kokkos::Impl::dimension( m_offset_map , i ); - } - - KOKKOS_INLINE_FUNCTION - typename traits::size_type capacity() { - return segments_.dimension_0() * - m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * - m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7; - } - - KOKKOS_INLINE_FUNCTION - typename traits::size_type get_num_segments() { - enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; - int n = SegmentedView::dimension_0_intern< Accessible >(); - return n/segment_length_ ; - } - - KOKKOS_INLINE_FUNCTION - typename traits::size_type get_max_segments() { - return max_segments_; - } - - /// \brief Constructor that allocates View objects with an initial length of 0. - /// - /// This constructor works mostly like the analogous constructor of View. - /// The first argument is a string label, which is entirely for your - /// benefit. (Different SegmentedView objects may have the same label if - /// you like.) The second argument 'view_length' is the size of the segments. - /// This number must be a power of two. The third argument n0 is the maximum - /// value for the first dimension of the segmented view. The maximal allocatable - /// number of Segments is thus: (n0+view_length-1)/view_length. - /// The arguments that follow are the other dimensions of the (1-7) of the - /// View objects. For example, for a View with 3 runtime dimensions, - /// the first 4 integer arguments will be nonzero: - /// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView - /// with a maximum of 306 segments of dimension (32768,8,4). The logical size of - /// the segmented view is (n,8,4) with n between 0 and 10000000. - /// You may omit the integer arguments that follow. - template< class LabelType > - SegmentedView(const LabelType & label , - const size_t view_length , - const size_t n0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 - ): segment_length_(view_length),segment_length_m1_(view_length-1) - { - segment_length_log2 = -1; - size_t l = segment_length_; - while(l>0) { - l>>=1; - segment_length_log2++; - } - l = 1<<segment_length_log2; - if(l!=segment_length_) - Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length"); - - max_segments_ = (n0+segment_length_m1_)/segment_length_; - - Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type)); - - segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_); - realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock"); - nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews"); - m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 ); - - } - - KOKKOS_INLINE_FUNCTION - SegmentedView(const SegmentedView& src): - segments_(src.segments_), - realloc_lock (src.realloc_lock), - nsegments_ (src.nsegments_), - segment_length_(src.segment_length_), - segment_length_m1_(src.segment_length_m1_), - max_segments_ (src.max_segments_), - segment_length_log2(src.segment_length_log2), - m_offset_map (src.m_offset_map) - {} - - KOKKOS_INLINE_FUNCTION - SegmentedView& operator= (const SegmentedView& src) { - segments_ = src.segments_; - realloc_lock = src.realloc_lock; - nsegments_ = src.nsegments_; - segment_length_= src.segment_length_; - segment_length_m1_= src.segment_length_m1_; - max_segments_ = src.max_segments_; - segment_length_log2= src.segment_length_log2; - m_offset_map = src.m_offset_map; - return *this; - } - - ~SegmentedView() { - if ( !segments_.tracker().ref_counting()) { return; } - size_t ref_count = segments_.tracker().ref_count(); - if(ref_count == 1u) { - Kokkos::fence(); - typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews"); - Kokkos::deep_copy(h_nviews,nsegments_); - Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this)); - } - } - - KOKKOS_INLINE_FUNCTION - t_dev get_segment(const int& i) const { - return segments_[i]; - } - - template< class MemberType> - KOKKOS_INLINE_FUNCTION - void grow (MemberType& team_member, const size_t& growSize) const { - if (growSize>max_segments_*segment_length_) { - printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_); - return; - } - - if(team_member.team_rank()==0) { - bool too_small = growSize > segment_length_ * nsegments_(); - if (too_small) { - while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) ) - ; // get the lock - too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock - if(too_small) { - while(too_small) { - const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* - m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; - typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size]; - - segments_(nsegments_()) = - t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7); - nsegments_()++; - too_small = growSize > segment_length_ * nsegments_(); - } - } - realloc_lock() = 0; //release the lock - } - } - team_member.team_barrier(); - } - - KOKKOS_INLINE_FUNCTION - void grow_non_thread_safe (const size_t& growSize) const { - if (growSize>max_segments_*segment_length_) { - printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_); - return; - } - bool too_small = growSize > segment_length_ * nsegments_(); - if(too_small) { - while(too_small) { - const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* - m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; - typename traits::non_const_value_type* const ptr = - new typename traits::non_const_value_type[alloc_size]; - - segments_(nsegments_()) = - t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2, - m_offset_map.N3, m_offset_map.N4, m_offset_map.N5, - m_offset_map.N6, m_offset_map.N7); - nsegments_()++; - too_small = growSize > segment_length_ * nsegments_(); - } - } - } - - template< typename iType0 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_)); - } - - template< typename iType0 , typename iType1 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - traits::rank == 2 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1); - } - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - traits::rank == 3 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2); - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - std::is_integral<iType3>::value && - traits::rank == 4 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3); - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - std::is_integral<iType3>::value && - std::is_integral<iType4>::value && - traits::rank == 5 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4); - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - std::is_integral<iType3>::value && - std::is_integral<iType4>::value && - std::is_integral<iType5>::value && - traits::rank == 6 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5); - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - std::is_integral<iType3>::value && - std::is_integral<iType4>::value && - std::is_integral<iType5>::value && - std::is_integral<iType6>::value && - traits::rank == 7 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6); - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<iType0>::value && - std::is_integral<iType1>::value && - std::is_integral<iType2>::value && - std::is_integral<iType3>::value && - std::is_integral<iType4>::value && - std::is_integral<iType5>::value && - std::is_integral<iType6>::value && - std::is_integral<iType7>::value && - traits::rank == 8 ) - , typename traits::value_type & - >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7); - } -}; - -namespace Impl { -template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> -struct delete_segmented_view { - typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type; - typedef typename view_type::execution_space execution_space; - - view_type view_; - delete_segmented_view(view_type view):view_(view) { - } - - KOKKOS_INLINE_FUNCTION - void operator() (int i) const { - delete [] view_.get_segment(i).ptr_on_device(); - } -}; - -} -} -} - -#endif - -#endif diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 7a916c6ef7c449a041d6d2014033e34c3342f185..8646d277921aff5c71b70c48d768ee39944b3455 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -241,9 +241,9 @@ public: typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type; typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type; - static const bool is_set = Impl::is_same<void,value_type>::value; - static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value; - static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value; + static const bool is_set = std::is_same<void,value_type>::value; + static const bool has_const_key = std::is_same<const_key_type,declared_key_type>::value; + static const bool has_const_value = is_set || std::is_same<const_value_type,declared_value_type>::value; static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value); static const bool is_modifiable_map = has_const_key && !has_const_value; @@ -735,8 +735,8 @@ public: } template <typename SKey, typename SValue, typename SDevice> - typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value && - Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value + typename Impl::enable_if< std::is_same< typename Impl::remove_const<SKey>::type, key_type>::value && + std::is_same< typename Impl::remove_const<SValue>::type, value_type>::value >::type create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src) { diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index 7fff0f835bb2e704914fe5df16556d6c4199a916..b9d860f32fd854a59e0258adabdc540a1ef0c512 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -1,6 +1,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) SET(SOURCES diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile index 48e3ff61d04b9de210a7f1976217f4d1aca9e8e8..c45e2be05ed73633331b775c1e71195e8d844acc 100644 --- a/lib/kokkos/containers/unit_tests/Makefile +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= -lpthread + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests TEST_TARGETS = diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp index e30160b24e3a57d927924067d171ee8b49540357..6be38cd7a762c2c376f0fdc36e1dfb8b0b54b251 100644 --- a/lib/kokkos/containers/unit_tests/TestCuda.cpp +++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp @@ -59,11 +59,13 @@ #include <TestVector.hpp> #include <TestDualView.hpp> #include <TestDynamicView.hpp> -#include <TestSegmentedView.hpp> #include <Kokkos_DynRankView.hpp> #include <TestDynViewAPI.hpp> +#include <Kokkos_ErrorReporter.hpp> +#include <TestErrorReporter.hpp> + //---------------------------------------------------------------------------- @@ -133,11 +135,6 @@ void cuda_test_dualview_combinations(unsigned int size) test_dualview_combinations<int,Kokkos::Cuda>(size); } -void cuda_test_segmented_view(unsigned int size) -{ - test_segmented_view<double,Kokkos::Cuda>(size); -} - void cuda_test_bitset() { test_bitset<Kokkos::Cuda>(); @@ -184,11 +181,6 @@ void cuda_test_bitset() cuda_test_dualview_combinations(size); \ } -#define CUDA_SEGMENTEDVIEW_TEST( size ) \ - TEST_F( cuda, segmentedview_##size##x) { \ - cuda_test_segmented_view(size); \ - } - CUDA_DUALVIEW_COMBINE_TEST( 10 ) CUDA_VECTOR_COMBINE_TEST( 10 ) CUDA_VECTOR_COMBINE_TEST( 3057 ) @@ -198,7 +190,6 @@ CUDA_INSERT_TEST(close, 100000, 90000, 100, 500) CUDA_INSERT_TEST(far, 100000, 90000, 100, 500) CUDA_DEEP_COPY( 10000, 1 ) CUDA_FAILED_INSERT_TEST( 10000, 1000 ) -CUDA_SEGMENTEDVIEW_TEST( 200 ) #undef CUDA_INSERT_TEST @@ -207,7 +198,6 @@ CUDA_SEGMENTEDVIEW_TEST( 200 ) #undef CUDA_DEEP_COPY #undef CUDA_VECTOR_COMBINE_TEST #undef CUDA_DUALVIEW_COMBINE_TEST -#undef CUDA_SEGMENTEDVIEW_TEST TEST_F( cuda , dynamic_view ) @@ -221,6 +211,18 @@ TEST_F( cuda , dynamic_view ) } +#if defined(KOKKOS_CLASS_LAMBDA) +TEST_F(cuda, ErrorReporterViaLambda) +{ + TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Cuda>>(); +} +#endif + +TEST_F(cuda, ErrorReporter) +{ + TestErrorReporter<ErrorReporterDriver<Kokkos::Cuda>>(); +} + } #endif /* #ifdef KOKKOS_HAVE_CUDA */ diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp index e71ccc0091f0ad8c67de46fe91b4b08e43dcc27d..d06277864486e2a80755629d6741f1b7f935fd37 100644 --- a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -715,9 +715,9 @@ public: typedef Kokkos::Experimental::DynRankView< T, device, Kokkos::MemoryUnmanaged > dView0_unmanaged ; typedef typename dView0::host_mirror_space host_drv_space ; - typedef Kokkos::Experimental::View< T , device > View0 ; - typedef Kokkos::Experimental::View< T* , device > View1 ; - typedef Kokkos::Experimental::View< T******* , device > View7 ; + typedef Kokkos::View< T , device > View0 ; + typedef Kokkos::View< T* , device > View1 ; + typedef Kokkos::View< T******* , device > View7 ; typedef typename View0::host_mirror_space host_view_space ; @@ -1127,8 +1127,7 @@ public: // T v2 = hx(0,0) ; // Generates compile error as intended // hx(0,0) = v2 ; // Generates compile error as intended -/* -#if ! KOKKOS_USING_EXP_VIEW +#if 0 /* Asynchronous deep copies not implemented for dynamic rank view */ // Testing with asynchronous deep copy with respect to device { size_t count = 0 ; @@ -1193,7 +1192,7 @@ public: { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } }}}} } -#endif */ // #if ! KOKKOS_USING_EXP_VIEW +#endif // Testing with synchronous deep copy { diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c431b62a5380b82bb5a00da4ac8d63411cbe2f78 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp @@ -0,0 +1,227 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP +#define KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP + +#include <gtest/gtest.h> +#include <iostream> +#include <Kokkos_Core.hpp> + +namespace Test { + +// Just save the data in the report. Informative text goies in the operator<<(..). +template <typename DataType1, typename DataType2, typename DataType3> +struct ThreeValReport +{ + DataType1 m_data1; + DataType2 m_data2; + DataType3 m_data3; + +}; + +template <typename DataType1, typename DataType2, typename DataType3> +std::ostream &operator<<(std::ostream & os, const ThreeValReport<DataType1, DataType2, DataType3> &val) +{ + return os << "{" << val.m_data1 << " " << val.m_data2 << " " << val.m_data3 << "}"; +} + +template<typename ReportType> +void checkReportersAndReportsAgree(const std::vector<int> &reporters, + const std::vector<ReportType> &reports) +{ + for (size_t i = 0; i < reports.size(); ++i) { + EXPECT_EQ(1, reporters[i] % 2); + EXPECT_EQ(reporters[i], reports[i].m_data1); + } +} + + +template <typename DeviceType> +struct ErrorReporterDriverBase { + + typedef ThreeValReport<int, int, double> report_type; + typedef Kokkos::Experimental::ErrorReporter<report_type, DeviceType> error_reporter_type; + error_reporter_type m_errorReporter; + + ErrorReporterDriverBase(int reporter_capacity, int test_size) + : m_errorReporter(reporter_capacity) { } + + KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const { return (work_idx % 2 != 0); } + + void check_expectations(int reporter_capacity, int test_size) + { + int num_reported = m_errorReporter.getNumReports(); + int num_attempts = m_errorReporter.getNumReportAttempts(); + + int expected_num_reports = std::min(reporter_capacity, test_size / 2); + EXPECT_EQ(expected_num_reports, num_reported); + EXPECT_EQ(test_size / 2, num_attempts); + + bool expect_full = (reporter_capacity <= (test_size / 2)); + bool reported_full = m_errorReporter.full(); + EXPECT_EQ(expect_full, reported_full); + } +}; + +template <typename ErrorReporterDriverType> +void TestErrorReporter() +{ + typedef ErrorReporterDriverType tester_type; + std::vector<int> reporters; + std::vector<typename tester_type::report_type> reports; + + tester_type test1(100, 10); + test1.m_errorReporter.getReports(reporters, reports); + checkReportersAndReportsAgree(reporters, reports); + + tester_type test2(10, 100); + test2.m_errorReporter.getReports(reporters, reports); + checkReportersAndReportsAgree(reporters, reports); + + typename Kokkos::View<int*, typename ErrorReporterDriverType::execution_space >::HostMirror view_reporters; + typename Kokkos::View<typename tester_type::report_type*, typename ErrorReporterDriverType::execution_space >::HostMirror + view_reports; + test2.m_errorReporter.getReports(view_reporters, view_reports); + + int num_reports = view_reporters.extent(0); + reporters.clear(); + reports.clear(); + reporters.reserve(num_reports); + reports.reserve(num_reports); + + for (int i = 0; i < num_reports; ++i) { + reporters.push_back(view_reporters(i)); + reports.push_back(view_reports(i)); + } + checkReportersAndReportsAgree(reporters, reports); + +} + + +template <typename DeviceType> +struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> +{ + typedef ErrorReporterDriverBase<DeviceType> driver_base; + typedef typename driver_base::error_reporter_type::execution_space execution_space; + + ErrorReporterDriver(int reporter_capacity, int test_size) + : driver_base(reporter_capacity, test_size) + { + execute(reporter_capacity, test_size); + + // Test that clear() and resize() work across memory spaces. + if (reporter_capacity < test_size) { + driver_base::m_errorReporter.clear(); + driver_base::m_errorReporter.resize(test_size); + execute(test_size, test_size); + } + } + + void execute(int reporter_capacity, int test_size) + { + Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), *this); + driver_base::check_expectations(reporter_capacity, test_size); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int work_idx) const + { + if (driver_base::error_condition(work_idx)) { + double val = M_PI * static_cast<double>(work_idx); + typename driver_base::report_type report = {work_idx, -2*work_idx, val}; + driver_base::m_errorReporter.add_report(work_idx, report); + } + } +}; + +#if defined(KOKKOS_CLASS_LAMBDA) +template <typename DeviceType> +struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType> +{ + + typedef ErrorReporterDriverBase<DeviceType> driver_base; + typedef typename driver_base::error_reporter_type::execution_space execution_space; + + ErrorReporterDriverUseLambda(int reporter_capacity, int test_size) + : driver_base(reporter_capacity, test_size) + { + Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), KOKKOS_CLASS_LAMBDA (const int work_idx) { + if (driver_base::error_condition(work_idx)) { + double val = M_PI * static_cast<double>(work_idx); + typename driver_base::report_type report = {work_idx, -2*work_idx, val}; + driver_base::m_errorReporter.add_report(work_idx, report); + } + }); + driver_base::check_expectations(reporter_capacity, test_size); + } + +}; +#endif + + +#ifdef KOKKOS_HAVE_OPENMP +struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP> +{ + typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base; + typedef typename driver_base::error_reporter_type::execution_space execution_space; + + ErrorReporterDriverNativeOpenMP(int reporter_capacity, int test_size) + : driver_base(reporter_capacity, test_size) + { +#pragma omp parallel for + for(int work_idx = 0; work_idx < test_size; ++work_idx) + { + if (driver_base::error_condition(work_idx)) { + double val = M_PI * static_cast<double>(work_idx); + typename driver_base::report_type report = {work_idx, -2*work_idx, val}; + driver_base::m_errorReporter.add_report(work_idx, report); + } + }; + driver_base::check_expectations(reporter_capacity, test_size); + } +}; +#endif + +} // namespace Test +#endif // #ifndef KOKKOS_TEST_ERROR_REPORTING_HPP diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp index a4319f39ff7ce626f45a3b7cd3fe9b2a823d1132..598a296c78a563043f0abd04c100db064151241f 100644 --- a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp @@ -56,12 +56,14 @@ #include <TestVector.hpp> #include <TestDualView.hpp> #include <TestDynamicView.hpp> -#include <TestSegmentedView.hpp> #include <TestComplex.hpp> #include <Kokkos_DynRankView.hpp> #include <TestDynViewAPI.hpp> +#include <Kokkos_ErrorReporter.hpp> +#include <TestErrorReporter.hpp> + #include <iomanip> namespace Test { @@ -143,11 +145,6 @@ TEST_F( openmp , staticcrsgraph ) test_dualview_combinations<int,Kokkos::OpenMP>(size); \ } -#define OPENMP_SEGMENTEDVIEW_TEST( size ) \ - TEST_F( openmp, segmentedview_##size##x) { \ - test_segmented_view<double,Kokkos::OpenMP>(size); \ - } - OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true) OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false) OPENMP_FAILED_INSERT_TEST( 10000, 1000 ) @@ -156,7 +153,6 @@ OPENMP_DEEP_COPY( 10000, 1 ) OPENMP_VECTOR_COMBINE_TEST( 10 ) OPENMP_VECTOR_COMBINE_TEST( 3057 ) OPENMP_DUALVIEW_COMBINE_TEST( 10 ) -OPENMP_SEGMENTEDVIEW_TEST( 10000 ) #undef OPENMP_INSERT_TEST #undef OPENMP_FAILED_INSERT_TEST @@ -164,7 +160,6 @@ OPENMP_SEGMENTEDVIEW_TEST( 10000 ) #undef OPENMP_DEEP_COPY #undef OPENMP_VECTOR_COMBINE_TEST #undef OPENMP_DUALVIEW_COMBINE_TEST -#undef OPENMP_SEGMENTEDVIEW_TEST #endif @@ -178,5 +173,22 @@ TEST_F( openmp , dynamic_view ) } } +#if defined(KOKKOS_CLASS_LAMBDA) +TEST_F(openmp, ErrorReporterViaLambda) +{ + TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::OpenMP>>(); +} +#endif + +TEST_F(openmp, ErrorReporter) +{ + TestErrorReporter<ErrorReporterDriver<Kokkos::OpenMP>>(); +} + +TEST_F(openmp, ErrorReporterNativeOpenMP) +{ + TestErrorReporter<ErrorReporterDriverNativeOpenMP>(); +} + } // namespace test diff --git a/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp b/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp deleted file mode 100644 index bfd66d12a7dc658fe729ce7016b95d5d05c60202..0000000000000000000000000000000000000000 --- a/lib/kokkos/containers/unit_tests/TestSegmentedView.hpp +++ /dev/null @@ -1,708 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP -#define KOKKOS_TEST_SEGMENTEDVIEW_HPP - -#include <gtest/gtest.h> -#include <iostream> -#include <cstdlib> -#include <cstdio> -#include <Kokkos_Core.hpp> - -#if ! KOKKOS_USING_EXP_VIEW - -#include <Kokkos_SegmentedView.hpp> -#include <impl/Kokkos_Timer.hpp> - -namespace Test { - -namespace Impl { - - template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank> - struct GrowTest; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 1> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+team_member.team_size()); - value += team_idx + team_member.team_rank(); - - if((a.dimension_0()>team_idx+team_member.team_rank()) && - (a.dimension(0)>team_idx+team_member.team_rank())) - a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank(); - - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 2> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - value += team_idx + team_member.team_rank() + 13*k; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) { - a(team_idx+ team_member.team_rank(),k) = - team_idx+ team_member.team_rank() + 13*k; - } - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 3> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - value += team_idx + team_member.team_rank() + 13*k + 3*l; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - a(team_idx+ team_member.team_rank(),k,l) = - team_idx+ team_member.team_rank() + 13*k + 3*l; - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 4> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - for( typename ExecutionSpace::size_type m=0;m<2;m++) - value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - a(team_idx+ team_member.team_rank(),k,l,m) = - team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m; - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 5> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - for( typename ExecutionSpace::size_type m=0;m<2;m++) - for( typename ExecutionSpace::size_type n=0;n<3;n++) - value += - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - a(team_idx+ team_member.team_rank(),k,l,m,n) = - team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n; - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 6> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - for( typename ExecutionSpace::size_type m=0;m<2;m++) - for( typename ExecutionSpace::size_type n=0;n<3;n++) - for( typename ExecutionSpace::size_type o=0;o<2;o++) - value += - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - a(team_idx+ team_member.team_rank(),k,l,m,n,o) = - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ; - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 7> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - a.grow(team_member , team_idx+ team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - for( typename ExecutionSpace::size_type m=0;m<2;m++) - for( typename ExecutionSpace::size_type n=0;n<3;n++) - for( typename ExecutionSpace::size_type o=0;o<2;o++) - for( typename ExecutionSpace::size_type p=0;p<4;p++) - value += - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) - a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) = - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ; - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct GrowTest<ViewType , ExecutionSpace , 8> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - GrowTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - a.grow(team_member , team_idx + team_member.team_size()); - - for( typename ExecutionSpace::size_type k=0;k<7;k++) - for( typename ExecutionSpace::size_type l=0;l<3;l++) - for( typename ExecutionSpace::size_type m=0;m<2;m++) - for( typename ExecutionSpace::size_type n=0;n<3;n++) - for( typename ExecutionSpace::size_type o=0;o<2;o++) - for( typename ExecutionSpace::size_type p=0;p<4;p++) - for( typename ExecutionSpace::size_type q=0;q<3;q++) - value += - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q; - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) - for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++) - a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) = - team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q; - } - } - }; - - template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank> - struct VerifyTest; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 1> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - value += a(team_idx+ team_member.team_rank()); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 2> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - value += a(team_idx+ team_member.team_rank(),k); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 3> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - value += a(team_idx+ team_member.team_rank(),k,l); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 4> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - value += a(team_idx+ team_member.team_rank(),k,l,m); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 5> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - value += a(team_idx+ team_member.team_rank(),k,l,m,n); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 6> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - value += a(team_idx+ team_member.team_rank(),k,l,m,n,o); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 7> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) - value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p); - } - } - }; - - template<class ViewType , class ExecutionSpace> - struct VerifyTest<ViewType , ExecutionSpace , 8> { - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - typedef typename Policy::member_type team_type; - typedef double value_type; - - ViewType a; - - VerifyTest(ViewType in):a(in) {} - - KOKKOS_INLINE_FUNCTION - void operator() (team_type team_member, double& value) const { - unsigned int team_idx = team_member.league_rank() * team_member.team_size(); - - if((a.dimension_0()>team_idx+ team_member.team_rank()) && - (a.dimension(0)>team_idx+ team_member.team_rank())) { - for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) - for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++) - for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++) - for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++) - for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++) - for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++) - for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++) - value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q); - } - } - }; - - template <typename Scalar, class ExecutionSpace> - struct test_segmented_view - { - typedef test_segmented_view<Scalar,ExecutionSpace> self_type; - - typedef Scalar scalar_type; - typedef ExecutionSpace execution_space; - typedef Kokkos::TeamPolicy<execution_space> Policy; - - double result; - double reference; - - template <class ViewType> - void run_me(ViewType a, int max_length){ - const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) ); - const int nteams = max_length/team_size; - - reference = 0; - result = 0; - - Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference); - Kokkos::fence(); - Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result); - Kokkos::fence(); - } - - - test_segmented_view(unsigned int size,int rank) - { - reference = 0; - result = 0; - - const int dim_1 = 7; - const int dim_2 = 3; - const int dim_3 = 2; - const int dim_4 = 3; - const int dim_5 = 2; - const int dim_6 = 4; - //const int dim_7 = 3; - - if(rank==1) { - typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view; - run_me< rank1_view >(rank1_view("Rank1",128,size), size); - } - if(rank==2) { - typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view; - run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size); - } - if(rank==3) { - typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view; - run_me< rank3_view >(rank3_view("Rank3",128,size), size); - } - if(rank==4) { - typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view; - run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size); - } - if(rank==5) { - typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view; - run_me< rank5_view >(rank5_view("Rank5",128,size), size); - } - if(rank==6) { - typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view; - run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size); - } - if(rank==7) { - typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view; - run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size); - } - if(rank==8) { - typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view; - run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size); - } - } - - }; - -} // namespace Impl - - - - -template <typename Scalar, class ExecutionSpace> -void test_segmented_view(unsigned int size) -{ - { - typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type; - view_type a("A",128,size,7,3,2,3); - double reference; - - Impl::GrowTest<view_type,ExecutionSpace> f(a); - - const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f ); - const int nteams = (size+team_size-1)/team_size; - - Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference); - - size_t real_size = ((size+127)/128)*128; - - ASSERT_EQ(real_size,a.dimension_0()); - ASSERT_EQ(7,a.dimension_1()); - ASSERT_EQ(3,a.dimension_2()); - ASSERT_EQ(2,a.dimension_3()); - ASSERT_EQ(3,a.dimension_4()); - ASSERT_EQ(2,a.dimension_5()); - ASSERT_EQ(4,a.dimension_6()); - ASSERT_EQ(3,a.dimension_7()); - ASSERT_EQ(real_size,a.dimension(0)); - ASSERT_EQ(7,a.dimension(1)); - ASSERT_EQ(3,a.dimension(2)); - ASSERT_EQ(2,a.dimension(3)); - ASSERT_EQ(3,a.dimension(4)); - ASSERT_EQ(2,a.dimension(5)); - ASSERT_EQ(4,a.dimension(6)); - ASSERT_EQ(3,a.dimension(7)); - ASSERT_EQ(8,a.Rank); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7); - ASSERT_EQ(test.reference,test.result); - } - { - Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8); - ASSERT_EQ(test.reference,test.result); - } - -} - - -} // namespace Test - -#else - -template <typename Scalar, class ExecutionSpace> -void test_segmented_view(unsigned int ) {} - -#endif - -#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */ - diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp index a7c42d27987d2938fb6b10254d72045732e0f74c..2be27ea613c5ecb18bcd72f5dfd9e542b44fec66 100644 --- a/lib/kokkos/containers/unit_tests/TestSerial.cpp +++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp @@ -58,7 +58,6 @@ #include <TestStaticCrsGraph.hpp> #include <TestVector.hpp> #include <TestDualView.hpp> -#include <TestSegmentedView.hpp> #include <TestDynamicView.hpp> #include <TestComplex.hpp> @@ -67,6 +66,9 @@ #include <Kokkos_DynRankView.hpp> #include <TestDynViewAPI.hpp> +#include <Kokkos_ErrorReporter.hpp> +#include <TestErrorReporter.hpp> + namespace Test { class serial : public ::testing::Test { @@ -135,11 +137,6 @@ TEST_F( serial, bitset ) test_dualview_combinations<int,Kokkos::Serial>(size); \ } -#define SERIAL_SEGMENTEDVIEW_TEST( size ) \ - TEST_F( serial, segmentedview_##size##x) { \ - test_segmented_view<double,Kokkos::Serial>(size); \ - } - SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true) SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false) SERIAL_FAILED_INSERT_TEST( 10000, 1000 ) @@ -148,7 +145,6 @@ SERIAL_DEEP_COPY( 10000, 1 ) SERIAL_VECTOR_COMBINE_TEST( 10 ) SERIAL_VECTOR_COMBINE_TEST( 3057 ) SERIAL_DUALVIEW_COMBINE_TEST( 10 ) -SERIAL_SEGMENTEDVIEW_TEST( 10000 ) #undef SERIAL_INSERT_TEST #undef SERIAL_FAILED_INSERT_TEST @@ -156,7 +152,6 @@ SERIAL_SEGMENTEDVIEW_TEST( 10000 ) #undef SERIAL_DEEP_COPY #undef SERIAL_VECTOR_COMBINE_TEST #undef SERIAL_DUALVIEW_COMBINE_TEST -#undef SERIAL_SEGMENTEDVIEW_TEST TEST_F( serial , dynamic_view ) { @@ -168,6 +163,19 @@ TEST_F( serial , dynamic_view ) } } +#if defined(KOKKOS_CLASS_LAMBDA) +TEST_F(serial, ErrorReporterViaLambda) +{ + TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Serial>>(); +} +#endif + +TEST_F(serial, ErrorReporter) +{ + TestErrorReporter<ErrorReporterDriver<Kokkos::Serial>>(); +} + + } // namespace Test #endif // KOKKOS_HAVE_SERIAL diff --git a/lib/kokkos/containers/unit_tests/TestThreads.cpp b/lib/kokkos/containers/unit_tests/TestThreads.cpp index 58277528d31d6ea6adae2996f5e8329b2c63b791..3b34006a01b99bff31896447b1838bfa18192f5e 100644 --- a/lib/kokkos/containers/unit_tests/TestThreads.cpp +++ b/lib/kokkos/containers/unit_tests/TestThreads.cpp @@ -62,11 +62,13 @@ #include <TestVector.hpp> #include <TestDualView.hpp> #include <TestDynamicView.hpp> -#include <TestSegmentedView.hpp> #include <Kokkos_DynRankView.hpp> #include <TestDynViewAPI.hpp> +#include <Kokkos_ErrorReporter.hpp> +#include <TestErrorReporter.hpp> + namespace Test { class threads : public ::testing::Test { @@ -145,12 +147,6 @@ TEST_F( threads , staticcrsgraph ) test_dualview_combinations<int,Kokkos::Threads>(size); \ } -#define THREADS_SEGMENTEDVIEW_TEST( size ) \ - TEST_F( threads, segmentedview_##size##x) { \ - test_segmented_view<double,Kokkos::Threads>(size); \ - } - - THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false) THREADS_FAILED_INSERT_TEST( 10000, 1000 ) THREADS_DEEP_COPY( 10000, 1 ) @@ -158,7 +154,6 @@ THREADS_DEEP_COPY( 10000, 1 ) THREADS_VECTOR_COMBINE_TEST( 10 ) THREADS_VECTOR_COMBINE_TEST( 3057 ) THREADS_DUALVIEW_COMBINE_TEST( 10 ) -THREADS_SEGMENTEDVIEW_TEST( 10000 ) #undef THREADS_INSERT_TEST @@ -167,8 +162,6 @@ THREADS_SEGMENTEDVIEW_TEST( 10000 ) #undef THREADS_DEEP_COPY #undef THREADS_VECTOR_COMBINE_TEST #undef THREADS_DUALVIEW_COMBINE_TEST -#undef THREADS_SEGMENTEDVIEW_TEST - TEST_F( threads , dynamic_view ) @@ -181,6 +174,19 @@ TEST_F( threads , dynamic_view ) } } + +#if defined(KOKKOS_CLASS_LAMBDA) +TEST_F(threads, ErrorReporterViaLambda) +{ + TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Threads>>(); +} +#endif + +TEST_F(threads, ErrorReporter) +{ + TestErrorReporter<ErrorReporterDriver<Kokkos::Threads>>(); +} + } // namespace Test diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake index 34ff0be5d3c6d26761b4758fda5d7217d66660e6..ae9a20c50efeadec69ab22e3365cd3ec26a5e451 100644 --- a/lib/kokkos/core/cmake/Dependencies.cmake +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -2,3 +2,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib TEST_OPTIONAL_TPLS CUSPARSE ) + +TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) \ No newline at end of file diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in index 27e3ba1c31f56aa35c6487488d96fa71f7b25d99..9359b5a32b71f06230ea8a2e878e0f457f8eee85 100644 --- a/lib/kokkos/core/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in @@ -45,6 +45,16 @@ #define KOKKOS_ENABLE_PROFILING 0 #endif +#cmakedefine KOKKOS_HAVE_CUDA_RDC +#ifdef KOKKOS_HAVE_CUDA_RDC +#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1 +#endif + +#cmakedefine KOKKOS_HAVE_CUDA_LAMBDA +#ifdef KOKKOS_HAVE_CUDA_LAMBDA +#define KOKKOS_CUDA_USE_LAMBDA 1 +#endif + // Don't forbid users from defining this macro on the command line, // but still make sure that CMake logic can control its definition. #if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt index d93ca14d96fe159def46c29165e743313f91c9c4..cae52f1409e43a8adf9046855cc77b24f2dadce7 100644 --- a/lib/kokkos/core/perf_test/CMakeLists.txt +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -1,6 +1,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES PerfTestMain.cpp @@ -19,7 +19,7 @@ TRIBITS_ADD_EXECUTABLE( TESTONLYLIBS kokkos_gtest ) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +TRIBITS_ADD_TEST( PerfTest NAME PerfTestExec COMM serial mpi diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index 8fa1fbfc3c00795cf0739a95f1fd23a988b30fa6..85f869971a33c349769bd318af28759f3e3eca12 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/core/perf_test default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= -lpthread + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test TEST_TARGETS = diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp index 6a0f2efadacd01e979d3beefd23b617b81acff48..4a05eecfe0eaa93665fe746c96248e09f14dded5 100644 --- a/lib/kokkos/core/perf_test/PerfTestHost.cpp +++ b/lib/kokkos/core/perf_test/PerfTestHost.cpp @@ -79,10 +79,21 @@ class host : public ::testing::Test { protected: static void SetUpTestCase() { - const unsigned team_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned threads_per_team = 4 ; - - TestHostDevice::initialize( team_count * threads_per_team ); + if(Kokkos::hwloc::available()) { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + unsigned threads_count = 0 ; + + threads_count = std::max( 1u , numa_count ) + * std::max( 2u , cores_per_numa * threads_per_core ); + + TestHostDevice::initialize( threads_count ); + } else { + const unsigned thread_count = 4 ; + TestHostDevice::initialize( thread_count ); + } } static void TearDownTestCase() diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp deleted file mode 100644 index 4ed7d8e2a8a40ef6434637f3e0ae72266e4c76bb..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp +++ /dev/null @@ -1,334 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP -#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP - -/* only compile this file if CUDA is enabled for Kokkos */ -#if defined( KOKKOS_HAVE_CUDA ) - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template<> -struct ViewOperatorBoundsErrorAbort< Kokkos::CudaSpace > { - KOKKOS_INLINE_FUNCTION - static void apply( const size_t rank - , const size_t n0 , const size_t n1 - , const size_t n2 , const size_t n3 - , const size_t n4 , const size_t n5 - , const size_t n6 , const size_t n7 - , const size_t i0 , const size_t i1 - , const size_t i2 , const size_t i3 - , const size_t i4 , const size_t i5 - , const size_t i6 , const size_t i7 ) - { - const int r = - ( n0 <= i0 ? 0 : - ( n1 <= i1 ? 1 : - ( n2 <= i2 ? 2 : - ( n3 <= i3 ? 3 : - ( n4 <= i4 ? 4 : - ( n5 <= i5 ? 5 : - ( n6 <= i6 ? 6 : 7 ))))))); - const size_t n = - ( n0 <= i0 ? n0 : - ( n1 <= i1 ? n1 : - ( n2 <= i2 ? n2 : - ( n3 <= i3 ? n3 : - ( n4 <= i4 ? n4 : - ( n5 <= i5 ? n5 : - ( n6 <= i6 ? n6 : n7 ))))))); - const size_t i = - ( n0 <= i0 ? i0 : - ( n1 <= i1 ? i1 : - ( n2 <= i2 ? i2 : - ( n3 <= i3 ? i3 : - ( n4 <= i4 ? i4 : - ( n5 <= i5 ? i5 : - ( n6 <= i6 ? i6 : i7 ))))))); - printf("Cuda view array bounds error index %d : FAILED %lu < %lu\n" , r , i , n ); - Kokkos::Impl::cuda_abort("Cuda view array bounds error"); - } -}; - -} // namespace Impl -} // namespace Experimental -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4) -// Via reinterpret_case this can be used to support all scalar types of those sizes. -// Any other scalar type falls back to either normal reads out of global memory, -// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0) - -template< typename ValueType , typename AliasType > -struct CudaTextureFetch { - - ::cudaTextureObject_t m_obj ; - const ValueType * m_ptr ; - int m_offset ; - - // Deference operator pulls through texture object and returns by value - template< typename iType > - KOKKOS_INLINE_FUNCTION - ValueType operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) - AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset ); - return *(reinterpret_cast<ValueType*> (&v)); -#else - return m_ptr[ i ]; -#endif - } - - // Pointer to referenced memory - KOKKOS_INLINE_FUNCTION - operator const ValueType * () const { return m_ptr ; } - - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : m_obj( rhs.m_obj ) - , m_ptr( rhs.m_ptr ) - , m_offset( rhs.m_offset ) - {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( CudaTextureFetch && rhs ) - : m_obj( rhs.m_obj ) - , m_ptr( rhs.m_ptr ) - , m_offset( rhs.m_offset ) - {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { - m_obj = rhs.m_obj ; - m_ptr = rhs.m_ptr ; - m_offset = rhs.m_offset ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( CudaTextureFetch && rhs ) - { - m_obj = rhs.m_obj ; - m_ptr = rhs.m_ptr ; - m_offset = rhs.m_offset ; - return *this ; - } - - // Texture object spans the entire allocation. - // This handle may view a subset of the allocation, so an offset is required. - template< class CudaMemorySpace > - inline explicit - CudaTextureFetch( const ValueType * const arg_ptr - , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record - ) - : m_obj( record.template attach_texture_object< AliasType >() ) - , m_ptr( arg_ptr ) - , m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) ) - {} -}; - -#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) - -template< typename ValueType , typename AliasType > -struct CudaLDGFetch { - - const ValueType * m_ptr ; - - template< typename iType > - KOKKOS_INLINE_FUNCTION - ValueType operator[]( const iType & i ) const - { - AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i])); - return *(reinterpret_cast<ValueType*> (&v)); - } - - KOKKOS_INLINE_FUNCTION - operator const ValueType * () const { return m_ptr ; } - - KOKKOS_INLINE_FUNCTION - CudaLDGFetch() : m_ptr() {} - - KOKKOS_INLINE_FUNCTION - ~CudaLDGFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaLDGFetch( const CudaLDGFetch & rhs ) - : m_ptr( rhs.m_ptr ) - {} - - KOKKOS_INLINE_FUNCTION - CudaLDGFetch( CudaLDGFetch && rhs ) - : m_ptr( rhs.m_ptr ) - {} - - KOKKOS_INLINE_FUNCTION - CudaLDGFetch & operator = ( const CudaLDGFetch & rhs ) - { - m_ptr = rhs.m_ptr ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - CudaLDGFetch & operator = ( CudaLDGFetch && rhs ) - { - m_ptr = rhs.m_ptr ; - return *this ; - } - - template< class CudaMemorySpace > - inline explicit - CudaTextureFetch( const ValueType * const arg_ptr - , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const & - ) - : m_ptr( arg_data_ptr ) - {} -}; - -#endif - -} // namespace Impl -} // namespace Experimental -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization - * if 'const' value type, CudaSpace and random access. - */ -template< class Traits > -class ViewDataHandle< Traits , - typename std::enable_if<( - // Is Cuda memory space - ( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || - std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ) - && - // Is a trivial const value of 4, 8, or 16 bytes - std::is_trivial<typename Traits::const_value_type>::value - && - std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value - && - ( sizeof(typename Traits::const_value_type) == 4 || - sizeof(typename Traits::const_value_type) == 8 || - sizeof(typename Traits::const_value_type) == 16 ) - && - // Random access trait - ( Traits::memory_traits::RandomAccess != 0 ) - )>::type > -{ -public: - - using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ; - - using value_type = typename Traits::const_value_type ; - using return_type = typename Traits::const_value_type ; // NOT a reference - - using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int , - typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 , - typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void - >::type - >::type - >::type ; - -#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) - using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ; -#else - using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ; -#endif - - KOKKOS_INLINE_FUNCTION - static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ ) - { - return arg_handle ; - } - - KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker ) - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - // Assignment of texture = non-texture requires creation of a texture object - // which can only occur on the host. In addition, 'get_record' is only valid - // if called in a host execution space - return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() ); -#else - Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel"); - return handle_type(); -#endif - } -}; - -} -} -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ -#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index a4f372d65d1ee6456d9ff6d21cd4775d6fb6c448..8abf2292d9c6dae685d56a51338f712f7ef1d2c8 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -46,6 +46,7 @@ #include <sstream> #include <stdexcept> #include <algorithm> +#include <atomic> #include <Kokkos_Macros.hpp> /* only compile this file if CUDA is enabled for Kokkos */ @@ -58,6 +59,11 @@ #include <Cuda/Kokkos_Cuda_Internal.hpp> #include <impl/Kokkos_Error.hpp> +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#endif + + /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ @@ -65,6 +71,9 @@ namespace Kokkos { namespace Impl { namespace { + + static std::atomic<int> num_uvm_allocations(0) ; + cudaStream_t get_deep_copy_stream() { static cudaStream_t s = 0; if( s == 0) { @@ -119,6 +128,7 @@ void CudaSpace::access_error( const void * const ) Kokkos::Impl::throw_runtime_exception( msg ); } + /*--------------------------------------------------------------------------*/ bool CudaUVMSpace::available() @@ -133,6 +143,11 @@ bool CudaUVMSpace::available() /*--------------------------------------------------------------------------*/ +int CudaUVMSpace::number_of_allocations() +{ + return Kokkos::Impl::num_uvm_allocations.load(); +} + } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -167,7 +182,18 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const { void * ptr = NULL; - CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); + enum { max_uvm_allocations = 65536 }; + + if ( arg_alloc_size > 0 ) + { + Kokkos::Impl::num_uvm_allocations++; + + if ( Kokkos::Impl::num_uvm_allocations.load() > max_uvm_allocations ) { + Kokkos::Impl::throw_runtime_exception( "CudaUVM error: The maximum limit of UVM allocations exceeded (currently 65536)." ) ; + } + + CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); + } return ptr ; } @@ -191,7 +217,10 @@ void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_all void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const { try { - CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); + if ( arg_alloc_ptr != nullptr ) { + Kokkos::Impl::num_uvm_allocations--; + CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); + } } catch(...) {} } @@ -202,13 +231,24 @@ void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t } catch(...) {} } +constexpr const char* CudaSpace::name() { + return m_name; +} + +constexpr const char* CudaUVMSpace::name() { + return m_name; +} + +constexpr const char* CudaHostPinnedSpace::name() { + return m_name; +} + } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { SharedAllocationRecord< void , void > @@ -335,6 +375,18 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::CudaSpace , void >:: ~SharedAllocationRecord() { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + + SharedAllocationHeader header ; + Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) ); + + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::CudaSpace::name()),header.m_label, + data(),size()); + } + #endif + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr , SharedAllocationRecord< void , void >::m_alloc_size ); @@ -343,6 +395,15 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >:: SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: ~SharedAllocationRecord() { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::fence(); //Make sure I can access the label ... + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::CudaUVMSpace::name()),RecordBase::m_alloc_ptr->m_label, + data(),size()); + } + #endif + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr , SharedAllocationRecord< void , void >::m_alloc_size ); @@ -351,6 +412,14 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: ~SharedAllocationRecord() { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::CudaHostPinnedSpace::name()),RecordBase::m_alloc_ptr->m_label, + data(),size()); + } + #endif + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr , SharedAllocationRecord< void , void >::m_alloc_size ); @@ -373,6 +442,12 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); + } + #endif + SharedAllocationHeader header ; // Fill in the Header information @@ -404,7 +479,12 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { - // Fill in the Header information, directly accessible via UVM + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); + } + #endif + // Fill in the Header information, directly accessible via UVM RecordBase::m_alloc_ptr->m_record = this ; @@ -430,6 +510,11 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space ) , m_space( arg_space ) { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); + } + #endif // Fill in the Header information, directly accessible via UVM RecordBase::m_alloc_ptr->m_record = this ; @@ -502,6 +587,7 @@ void SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: deallocate_tracked( void * const arg_alloc_ptr ) { if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); RecordBase::decrement( r ); @@ -587,7 +673,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ; if ( ! alloc_ptr || record->m_alloc_ptr != head_cuda ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); } #else @@ -598,7 +684,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) ); if ( record == 0 ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) ); } #endif @@ -615,7 +701,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ; if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) ); } return static_cast< RecordCuda * >( h->m_record ); @@ -630,7 +716,7 @@ SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ; if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) ); } return static_cast< RecordCuda * >( h->m_record ); @@ -728,7 +814,6 @@ print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bo } } // namespace Impl -} // namespace Experimental } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp index 2d8d07d0772f2dd2d27a73a4b804f3000953c824..59e79bba2570342b4175252914bd34701c5782b3 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -384,10 +384,10 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) const bool ok_id = 0 <= cuda_device_id && cuda_device_id < dev_info.m_cudaDevCount ; - // Need device capability 2.0 or better + // Need device capability 3.0 or better const bool ok_dev = ok_id && - ( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major && + ( 3 <= dev_info.m_cudaProp[ cuda_device_id ].major && 0 <= dev_info.m_cudaProp[ cuda_device_id ].minor ); if ( ok_init && ok_dev ) { @@ -444,7 +444,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) //---------------------------------- // Maximum number of blocks: - m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ; + m_maxBlock = cudaProp.maxGridSize[0] ; //---------------------------------- @@ -495,7 +495,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) msg << dev_info.m_cudaProp[ cuda_device_id ].major ; msg << "." ; msg << dev_info.m_cudaProp[ cuda_device_id ].minor ; - msg << " has insufficient capability, required 2.0 or better" ; + msg << " has insufficient capability, required 3.0 or better" ; } Kokkos::Impl::throw_runtime_exception( msg.str() ); } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 7afa06fdf5582cd3543294b4156ac90a906a6ce7..12a639fd44aac274c3b7f29c89e850806f8e5ae4 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -95,27 +95,42 @@ private: public: -#if defined( __CUDA_ARCH__ ) - - __device__ inline + KOKKOS_INLINE_FUNCTION const execution_space::scratch_memory_space & team_shmem() const { return m_team_shared.set_team_thread_mode(0,1,0) ; } - __device__ inline + KOKKOS_INLINE_FUNCTION const execution_space::scratch_memory_space & team_scratch(const int& level) const { return m_team_shared.set_team_thread_mode(level,1,0) ; } - __device__ inline + KOKKOS_INLINE_FUNCTION const execution_space::scratch_memory_space & thread_scratch(const int& level) const { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } - __device__ inline int league_rank() const { return m_league_rank ; } - __device__ inline int league_size() const { return m_league_size ; } - __device__ inline int team_rank() const { return threadIdx.y ; } - __device__ inline int team_size() const { return blockDim.y ; } + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { + #ifdef __CUDA_ARCH__ + return threadIdx.y ; + #else + return 1; + #endif + } + KOKKOS_INLINE_FUNCTION int team_size() const { + #ifdef __CUDA_ARCH__ + return blockDim.y ; + #else + return 1; + #endif + } - __device__ inline void team_barrier() const { __syncthreads(); } + KOKKOS_INLINE_FUNCTION void team_barrier() const { + #ifdef __CUDA_ARCH__ + __syncthreads(); + #endif + } template<class ValueType> - __device__ inline void team_broadcast(ValueType& value, const int& thread_id) const { + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, const int& thread_id) const { + #ifdef __CUDA_ARCH__ __shared__ ValueType sh_val; if(threadIdx.x == 0 && threadIdx.y == thread_id) { sh_val = value; @@ -123,26 +138,17 @@ public: team_barrier(); value = sh_val; team_barrier(); + #endif } -#ifdef KOKKOS_HAVE_CXX11 template< class ValueType, class JoinOp > - __device__ inline + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce( const ValueType & value - , const JoinOp & op_in ) const - { + , const JoinOp & op_in ) const { + #ifdef __CUDA_ARCH__ typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ; const JoinOpFunctor op(op_in); ValueType * const base_data = (ValueType *) m_team_reduce ; -#else - template< class JoinOp > - __device__ inline - typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value - , const JoinOp & op ) const - { - typedef JoinOp JoinOpFunctor ; - typename JoinOp::value_type * const base_data = (typename JoinOp::value_type *) m_team_reduce ; -#endif __syncthreads(); // Don't write in to shared data until all threads have entered this function @@ -153,6 +159,9 @@ public: Impl::cuda_intra_block_reduce_scan<false,JoinOpFunctor,void>( op , base_data ); return base_data[ blockDim.y - 1 ]; + #else + return typename JoinOp::value_type(); + #endif } /** \brief Intra-team exclusive prefix sum with team_rank() ordering @@ -165,8 +174,8 @@ public: * non-deterministic. */ template< typename Type > - __device__ inline Type team_scan( const Type & value , Type * const global_accum ) const - { + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const { + #ifdef __CUDA_ARCH__ Type * const base_data = (Type *) m_team_reduce ; __syncthreads(); // Don't write in to shared data until all threads have entered this function @@ -186,6 +195,9 @@ public: } return base_data[ threadIdx.y ]; + #else + return Type(); + #endif } /** \brief Intra-team exclusive prefix sum with team_rank() ordering. @@ -194,13 +206,14 @@ public: * reduction_total = dev.team_scan( value ) + value ; */ template< typename Type > - __device__ inline Type team_scan( const Type & value ) const - { return this->template team_scan<Type>( value , 0 ); } + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const { + return this->template team_scan<Type>( value , 0 ); + } //---------------------------------------- // Private for the driver - __device__ inline + KOKKOS_INLINE_FUNCTION CudaTeamMember( void * shared , const int shared_begin , const int shared_size @@ -210,51 +223,10 @@ public: , const int arg_league_size ) : m_team_reduce( shared ) , m_team_shared( ((char *)shared) + shared_begin , shared_size, scratch_level_1_ptr, scratch_level_1_size) - , m_league_rank( arg_league_rank ) - , m_league_size( arg_league_size ) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) {} -#else - - const execution_space::scratch_memory_space & team_shmem() const - { return m_team_shared.set_team_thread_mode(0, 1,0) ; } - const execution_space::scratch_memory_space & team_scratch(const int& level) const - { return m_team_shared.set_team_thread_mode(level,1,0) ; } - const execution_space::scratch_memory_space & thread_scratch(const int& level) const - { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } - - int league_rank() const {return 0;} - int league_size() const {return 1;} - int team_rank() const {return 0;} - int team_size() const {return 1;} - - void team_barrier() const {} - template<class ValueType> - void team_broadcast(ValueType& value, const int& thread_id) const {} - - template< class JoinOp > - typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value - , const JoinOp & op ) const {return typename JoinOp::value_type();} - - template< typename Type > - Type team_scan( const Type & value , Type * const global_accum ) const {return Type();} - - template< typename Type > - Type team_scan( const Type & value ) const {return Type();} - - //---------------------------------------- - // Private for the driver - - CudaTeamMember( void * shared - , const int shared_begin - , const int shared_end - , void* scratch_level_1_ptr - , const int scratch_level_1_size - , const int arg_league_rank - , const int arg_league_size ); - -#endif /* #if ! defined( __CUDA_ARCH__ ) */ - }; } // namespace Impl @@ -356,7 +328,7 @@ public: , m_vector_length( 0 ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} - , m_chunk_size ( 32 ) + , m_chunk_size ( 32 ) {} /** \brief Specify league size, request team size */ @@ -508,7 +480,7 @@ private: typedef typename Policy::work_tag WorkTag ; const FunctorType m_functor ; - const Policy m_policy ; + const Policy m_policy ; ParallelFor() = delete ; ParallelFor & operator = ( const ParallelFor & ) = delete ; @@ -638,8 +610,8 @@ public: } - ParallelFor( const FunctorType & arg_functor - , const Policy & arg_policy + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) : m_functor( arg_functor ) , m_league_size( arg_policy.league_size() ) @@ -680,7 +652,7 @@ template< class FunctorType , class ReducerType, class ... Traits > class ParallelReduce< FunctorType , Kokkos::RangePolicy< Traits ... > , ReducerType - , Kokkos::Cuda + , Kokkos::Cuda > { private: @@ -835,23 +807,22 @@ public: const int nwork = m_policy.end() - m_policy.begin(); if ( nwork ) { const int block_size = local_block_size( m_functor ); - + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); - + // REQUIRED ( 1 , N , 1 ) const dim3 block( 1 , block_size , 1 ); // Required grid.x <= block.y const dim3 grid( std::min( int(block.y) , int( ( nwork + block.y - 1 ) / block.y ) ) , 1 , 1 ); - + const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y ); - CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute - + Cuda::fence(); - + if ( m_result_ptr ) { if ( m_unified_space ) { const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); @@ -871,8 +842,8 @@ public: } template< class HostViewType > - ParallelReduce( const FunctorType & arg_functor - , const Policy & arg_policy + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy , const HostViewType & arg_result , typename std::enable_if< Kokkos::is_view< HostViewType >::value @@ -925,7 +896,6 @@ private: typedef typename ValueTraits::reference_type reference_type ; typedef typename ValueTraits::value_type value_type ; - public: typedef FunctorType functor_type ; @@ -937,7 +907,6 @@ private: typedef double DummyShflReductionType; typedef int DummySHMEMReductionType; - // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1 // shared memory utilization: // @@ -1058,36 +1027,44 @@ public: inline void execute() { - const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) ) - :std::min( m_league_size , m_team_size ); + const int nwork = m_league_size * m_team_size ; + if ( nwork ) { + const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) ) + :std::min( m_league_size , m_team_size ); - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); + m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); - const dim3 block( m_vector_size , m_team_size , 1 ); - const dim3 grid( block_count , 1 , 1 ); - const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; + const dim3 block( m_vector_size , m_team_size , 1 ); + const dim3 grid( block_count , 1 , 1 ); + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; - CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute + CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute - Cuda::fence(); + Cuda::fence(); - if ( m_result_ptr ) { - if ( m_unified_space ) { - const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); - for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size ); + } } - else { - const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); - DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size ); + } + else { + if (m_result_ptr) { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); } } } template< class HostViewType > - ParallelReduce( const FunctorType & arg_functor - , const Policy & arg_policy + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy , const HostViewType & arg_result , typename std::enable_if< Kokkos::is_view< HostViewType >::value @@ -1106,9 +1083,18 @@ public: , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) + arg_policy.vector_length() ) , m_vector_size( arg_policy.vector_length() ) - , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)} + , m_scratch_size{ + arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / + arg_policy.vector_length() ) + ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / + arg_policy.vector_length() ) + )} { // Return Init value if the number of worksets is zero if( arg_policy.league_size() == 0) { @@ -1342,7 +1328,7 @@ private: } // Scan block values into locations shared_data[1..blockDim.y] - cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , ValueTraits::pointer_type(shared_data+word_count.value) ); + cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , typename ValueTraits::pointer_type(shared_data+word_count.value) ); { size_type * const block_total = shared_data + word_count.value * blockDim.y ; @@ -1391,32 +1377,32 @@ public: const int nwork = m_policy.end() - m_policy.begin(); if ( nwork ) { enum { GridMaxComputeCapability_2x = 0x0ffff }; - + const int block_size = local_block_size( m_functor ); - + const int grid_max = ( block_size * block_size ) < GridMaxComputeCapability_2x ? ( block_size * block_size ) : GridMaxComputeCapability_2x ; - + // At most 'max_grid' blocks: const int max_grid = std::min( int(grid_max) , int(( nwork + block_size - 1 ) / block_size )); - + // How much work per block: const int work_per_block = ( nwork + max_grid - 1 ) / max_grid ; - + // How many block are really needed for this much work: const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; - + m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); - + const dim3 grid( grid_x , 1 , 1 ); const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); - + m_final = false ; CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute - + m_final = true ; CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute } @@ -1490,18 +1476,30 @@ namespace Impl { #ifdef __CUDA_ARCH__ __device__ inline - ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread, const iType& count): + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count): start( threadIdx.x ), end( count ), increment( blockDim.x ) {} + __device__ inline + ThreadVectorRangeBoundariesStruct (const iType& count): + start( threadIdx.x ), + end( count ), + increment( blockDim.x ) + {} #else KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count): start( 0 ), end( count ), increment( 1 ) {} + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const iType& count): + start( 0 ), + end( count ), + increment( 1 ) + {} #endif }; @@ -1509,22 +1507,24 @@ namespace Impl { template<typename iType> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> - TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& count) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,count); +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember > +TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) { + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); } -template<typename iType> +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> - TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& begin, const iType& end) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,begin,end); +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::CudaTeamMember > +TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); } template<typename iType> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > - ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { +ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count); } @@ -1571,9 +1571,10 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Cud lambda(i,result); } - Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; }); - Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; }); - + Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) + { dst+=src; }); + Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) + { dst+=src; }); #endif } @@ -1923,4 +1924,3 @@ namespace Impl { #endif /* defined( __CUDACC__ ) */ #endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 1778f631c0ef07b2bad25ea2c855e65c258e6f57..f30a0a891f6a2d4d90666c56e50464229bd69d6e 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -139,6 +139,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type const result, Cuda::size_type * const m_scratch_flags, const int max_active_thread = blockDim.y) { +#ifdef __CUDA_ARCH__ typedef typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type pointer_type; typedef typename FunctorValueTraits< FunctorType , ArgTag >::value_type value_type; @@ -213,6 +214,9 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT //The last block has in its thread=0 the global reduction value through "value" return last_block; +#else + return true; +#endif } //---------------------------------------------------------------------------- @@ -290,10 +294,10 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor , if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ; - BLOCK_SCAN_STEP(tdata_inter,n,8) - BLOCK_SCAN_STEP(tdata_inter,n,7) - BLOCK_SCAN_STEP(tdata_inter,n,6) - BLOCK_SCAN_STEP(tdata_inter,n,5) + __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8) + __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7) + __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6) + __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5) } } } @@ -308,12 +312,19 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor , ( rtid_intra & 16 ) ? 16 : 0 )))); if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ; - + #ifdef KOKKOS_CUDA_CLANG_WORKAROUND + BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,1) __syncthreads();//__threadfence_block(); + BLOCK_SCAN_STEP(tdata_intra,n,0) __syncthreads(); + #else BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block(); - BLOCK_SCAN_STEP(tdata_intra,n,0) + BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block(); + #endif } #undef BLOCK_SCAN_STEP diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index 701d267e1ba39413061afd337ac19c7d6acaacfc..d56de5db60ae71b34481752870c41fdc4d784cb1 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -43,7 +43,7 @@ #include <Kokkos_Core.hpp> -#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) #include <impl/Kokkos_TaskQueue_impl.hpp> @@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n"); //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 9d9347cc8d57c0c04a228fb0291c0f4e90b6243f..479294f3078a4e0d055610cb38b599415bbac921 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_IMPL_CUDA_TASK_HPP #define KOKKOS_IMPL_CUDA_TASK_HPP -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -99,7 +99,7 @@ public: extern template class TaskQueue< Kokkos::Cuda > ; //---------------------------------------------------------------------------- -/**\brief Impl::TaskExec<Cuda> is the TaskPolicy<Cuda>::member_type +/**\brief Impl::TaskExec<Cuda> is the TaskScheduler<Cuda>::member_type * passed to tasks running in a Cuda space. * * Cuda thread blocks for tasking are dimensioned: @@ -234,19 +234,23 @@ namespace Kokkos { template<typename iType> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType & count ) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > > +TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >(thread,count); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count ); } -template<typename iType> +template<typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & start , const iType & end ) +Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type<iType1,iType2>::type + , Impl::TaskExec< Kokkos::Cuda > > +TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread + , const iType1 & begin, const iType2 & end ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Cuda > >(thread,start,end); + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( + thread, iType(begin), iType(end) ); } template<typename iType> @@ -315,7 +319,7 @@ ValueType shfl_warp_broadcast } // all-reduce across corresponding vector lanes between team members within warp -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -344,7 +348,7 @@ void parallel_reduce // all-reduce across corresponding vector lanes between team members within warp // if no join() provided, use sum -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -372,7 +376,7 @@ void parallel_reduce } // all-reduce within team members within warp -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -397,7 +401,7 @@ void parallel_reduce // all-reduce within team members within warp // if no join() provided, use sum -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -426,7 +430,7 @@ void parallel_reduce } // scan across corresponding vector lanes between team members within warp -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -469,7 +473,7 @@ void parallel_scan } // scan within team member (vector) within warp -// assume vec_length*team_size == warp_size +// assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec @@ -514,6 +518,6 @@ void parallel_scan //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_CUDA_TASK_HPP */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp deleted file mode 100644 index bb3cd2640d79ad980219861a6e4f0c233c0686bb..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp +++ /dev/null @@ -1,932 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#include <stdio.h> -#include <iostream> -#include <sstream> -#include <Kokkos_Core.hpp> -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> - -#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) - -// #define DETAILED_PRINT - -//---------------------------------------------------------------------------- - -#define QLOCK reinterpret_cast<void*>( ~((uintptr_t)0) ) -#define QDENIED reinterpret_cast<void*>( ~((uintptr_t)0) - 1 ) - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -void CudaTaskPolicyQueue::Destroy::destroy_shared_allocation() -{ - // Verify the queue is empty - - if ( m_policy->m_count_ready || - m_policy->m_team[0] || - m_policy->m_team[1] || - m_policy->m_team[2] || - m_policy->m_serial[0] || - m_policy->m_serial[1] || - m_policy->m_serial[2] ) { - Kokkos::abort("CudaTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" ); - } - - m_policy->~CudaTaskPolicyQueue(); - - Kokkos::Cuda::fence(); -} - -CudaTaskPolicyQueue:: -~CudaTaskPolicyQueue() -{ -} - -CudaTaskPolicyQueue:: -CudaTaskPolicyQueue - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_team_size - ) - : m_space( Kokkos::CudaUVMSpace() - , arg_task_max_size * arg_task_max_count * 1.2 - , 16 /* log2(superblock size) */ - ) - , m_team { 0 , 0 , 0 } - , m_serial { 0 , 0 , 0 } - , m_team_size( 32 /* 1 warps */ ) - , m_default_dependence_capacity( arg_task_default_dependence_capacity ) - , m_count_ready(0) -{ - constexpr int max_team_size = 32 * 16 /* 16 warps */ ; - - const int target_team_size = - std::min( int(arg_team_size) , max_team_size ); - - while ( m_team_size < target_team_size ) { m_team_size *= 2 ; } -} - -//----------------------------------------------------------------------- -// Called by each block & thread - -__device__ -void Kokkos::Experimental::Impl::CudaTaskPolicyQueue::driver() -{ - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - -#define IS_TEAM_LEAD ( threadIdx.x == 0 && threadIdx.y == 0 ) - -#ifdef DETAILED_PRINT -if ( IS_TEAM_LEAD ) { - printf( "CudaTaskPolicyQueue::driver() begin on %d with count %d\n" - , blockIdx.x , m_count_ready ); -} -#endif - - // Each thread block must iterate this loop synchronously - // to insure team-execution of team-task - - __shared__ task_root_type * team_task ; - - __syncthreads(); - - do { - - if ( IS_TEAM_LEAD ) { - if ( 0 == m_count_ready ) { - team_task = q_denied ; // All queues are empty and no running tasks - } - else { - team_task = 0 ; - for ( int i = 0 ; i < int(NPRIORITY) && 0 == team_task ; ++i ) { - if ( ( i < 2 /* regular queue */ ) - || ( ! m_space.is_empty() /* waiting for memory */ ) ) { - team_task = pop_ready_task( & m_team[i] ); - } - } - } - } - - __syncthreads(); - -#ifdef DETAILED_PRINT -if ( IS_TEAM_LEAD && 0 != team_task ) { - printf( "CudaTaskPolicyQueue::driver() (%d) team_task(0x%lx)\n" - , blockIdx.x - , (unsigned long) team_task ); -} -#endif - - // team_task == q_denied if all queues are empty - // team_task == 0 if no team tasks available - - if ( q_denied != team_task ) { - if ( 0 != team_task ) { - - Kokkos::Impl::CudaTeamMember - member( kokkos_impl_cuda_shared_memory<void>() - , 16 /* shared_begin */ - , team_task->m_shmem_size /* shared size */ - , 0 /* scratch level 1 pointer */ - , 0 /* scratch level 1 size */ - , 0 /* league rank */ - , 1 /* league size */ - ); - - (*team_task->m_team)( team_task , member ); - - // A __synthreads was called and if completed the - // functor was destroyed. - - if ( IS_TEAM_LEAD ) { - complete_executed_task( team_task ); - } - } - else { - // One thread of one warp performs this serial task - if ( threadIdx.x == 0 && - 0 == ( threadIdx.y % 32 ) ) { - task_root_type * task = 0 ; - for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { - if ( ( i < 2 /* regular queue */ ) - || ( ! m_space.is_empty() /* waiting for memory */ ) ) { - task = pop_ready_task( & m_serial[i] ); - } - } - -#ifdef DETAILED_PRINT -if ( 0 != task ) { - printf( "CudaTaskPolicyQueue::driver() (%2d)(%d) single task(0x%lx)\n" - , blockIdx.x - , threadIdx.y - , (unsigned long) task ); -} -#endif - - if ( task ) { - (*task->m_serial)( task ); - complete_executed_task( task ); - } - } - - __syncthreads(); - } - } - } while ( q_denied != team_task ); - -#ifdef DETAILED_PRINT -if ( IS_TEAM_LEAD ) { - printf( "CudaTaskPolicyQueue::driver() end on %d with count %d\n" - , blockIdx.x , m_count_ready ); -} -#endif - -#undef IS_TEAM_LEAD -} - -//----------------------------------------------------------------------- - -__device__ -CudaTaskPolicyQueue::task_root_type * -CudaTaskPolicyQueue::pop_ready_task( - CudaTaskPolicyQueue::task_root_type * volatile * const queue ) -{ - task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); - task_root_type * task = 0 ; - task_root_type * const task_claim = *queue ; - - if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) { - - // Queue is not locked and not null, try to claim head of queue. - // Is a race among threads to claim the queue. - - if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) { - - // Aquired the task which must be in the waiting state. - - const int claim_state = - atomic_compare_exchange( & task_claim->m_state - , int(TASK_STATE_WAITING) - , int(TASK_STATE_EXECUTING) ); - - task_root_type * lock_verify = 0 ; - - if ( claim_state == int(TASK_STATE_WAITING) ) { - - // Transitioned this task from waiting to executing - // Update the queue to the next entry and release the lock - - task_root_type * const next = - *((task_root_type * volatile *) & task_claim->m_next ); - - *((task_root_type * volatile *) & task_claim->m_next ) = 0 ; - - lock_verify = atomic_compare_exchange( queue , q_lock , next ); - } - - if ( ( claim_state != int(TASK_STATE_WAITING) ) | - ( q_lock != lock_verify ) ) { - - printf( "CudaTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n" - , (unsigned long) queue - , (unsigned long) task - , claim_state - , ( claim_state != int(TASK_STATE_WAITING) - ? "NOT WAITING" - : "UNLOCK" ) ); - Kokkos::abort("CudaTaskPolicyQueue::pop_ready_task"); - } - - task = task_claim ; - } - } - return task ; -} - -//----------------------------------------------------------------------- - -__device__ -void CudaTaskPolicyQueue::complete_executed_task( - CudaTaskPolicyQueue::task_root_type * task ) -{ - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - -#ifdef DETAILED_PRINT -printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) state(%d) (%d)(%d,%d)\n" - , (unsigned long) task - , task->m_state - , blockIdx.x - , threadIdx.x - , threadIdx.y - ); -#endif - - // State is either executing or if respawned then waiting, - // try to transition from executing to complete. - // Reads the current value. - - const int state_old = - atomic_compare_exchange( & task->m_state - , int(Kokkos::Experimental::TASK_STATE_EXECUTING) - , int(Kokkos::Experimental::TASK_STATE_COMPLETE) ); - - if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) { - /* Task requested a respawn so reschedule it */ - schedule_task( task , false /* not initial spawn */ ); - } - else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) { - /* Task is complete */ - - // Clear dependences of this task before locking wait queue - - task->clear_dependence(); - - // Stop other tasks from adding themselves to this task's wait queue. - // The wait queue is updated concurrently so guard with an atomic. - - task_root_type * wait_queue = *((task_root_type * volatile *) & task->m_wait ); - task_root_type * wait_queue_old = 0 ; - - do { - wait_queue_old = wait_queue ; - wait_queue = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied ); - } while ( wait_queue_old != wait_queue ); - - // The task has been removed from ready queue and - // execution is complete so decrement the reference count. - // The reference count was incremented by the initial spawning. - // The task may be deleted if this was the last reference. - - task_root_type::assign( & task , 0 ); - - // Pop waiting tasks and schedule them - while ( wait_queue ) { - task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; - schedule_task( x , false /* not initial spawn */ ); - } - } - else { - printf( "CudaTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n" - , (unsigned long)( task ) - , int(state_old) - , task->m_dep_size - ); - Kokkos::abort("CudaTaskPolicyQueue::complete_executed_task" ); - } - - // If the task was respawned it may have already been - // put in a ready queue and the count incremented. - // By decrementing the count last it will never go to zero - // with a ready or executing task. - - atomic_fetch_add( & m_count_ready , -1 ); -} - -__device__ -void TaskMember< Kokkos::Cuda , void , void >::latch_add( const int k ) -{ - typedef TaskMember< Kokkos::Cuda , void , void > task_root_type ; - - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - const bool ok_input = 0 < k ; - - const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k - : k ; - - const bool ok_count = 0 <= count ; - - const int state = 0 != count ? TASK_STATE_WAITING : - atomic_compare_exchange( & m_state - , TASK_STATE_WAITING - , TASK_STATE_COMPLETE ); - - const bool ok_state = state == TASK_STATE_WAITING ; - - if ( ! ok_count || ! ok_state ) { - printf( "CudaTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n" - , (unsigned long) this - , k - , ( ! ok_input ? "Non-positive input" : - ( ! ok_count ? "Negative count" : "Bad State" ) ) - , ( ! ok_input ? k : - ( ! ok_count ? count : state ) ) - ); - Kokkos::abort( "CudaTaskPolicyQueue::latch_add ERROR" ); - } - else if ( 0 == count ) { - // Stop other tasks from adding themselves to this latch's wait queue. - // The wait queue is updated concurrently so guard with an atomic. - - CudaTaskPolicyQueue & policy = *m_policy ; - task_root_type * wait_queue = *((task_root_type * volatile *) &m_wait); - task_root_type * wait_queue_old = 0 ; - - do { - wait_queue_old = wait_queue ; - wait_queue = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied ); - } while ( wait_queue_old != wait_queue ); - - // Pop waiting tasks and schedule them - while ( wait_queue ) { - task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; - policy.schedule_task( x , false /* not initial spawn */ ); - } - } -} - -//---------------------------------------------------------------------------- - -void CudaTaskPolicyQueue::reschedule_task( - CudaTaskPolicyQueue::task_root_type * const task ) -{ - // Reschedule transitions from executing back to waiting. - const int old_state = - atomic_compare_exchange( & task->m_state - , int(TASK_STATE_EXECUTING) - , int(TASK_STATE_WAITING) ); - - if ( old_state != int(TASK_STATE_EXECUTING) ) { - - printf( "CudaTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n" - , (unsigned long) task - , old_state - ); - Kokkos::abort("CudaTaskPolicyQueue::reschedule" ); - } -} - -KOKKOS_FUNCTION -void CudaTaskPolicyQueue::schedule_task( - CudaTaskPolicyQueue::task_root_type * const task , - const bool initial_spawn ) -{ - task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - //---------------------------------------- - // State is either constructing or already waiting. - // If constructing then transition to waiting. - - { - const int old_state = atomic_compare_exchange( & task->m_state - , int(TASK_STATE_CONSTRUCTING) - , int(TASK_STATE_WAITING) ); - - // Head of linked list of tasks waiting on this task - task_root_type * const waitTask = - *((task_root_type * volatile const *) & task->m_wait ); - - // Member of linked list of tasks waiting on some other task - task_root_type * const next = - *((task_root_type * volatile const *) & task->m_next ); - - // An incomplete and non-executing task has: - // task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING - // task->m_wait != q_denied - // task->m_next == 0 - // - if ( ( q_denied == waitTask ) || - ( 0 != next ) || - ( old_state != int(TASK_STATE_CONSTRUCTING) && - old_state != int(TASK_STATE_WAITING) ) ) { - printf( "CudaTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n" - , (unsigned long) task - , old_state - , (unsigned long) waitTask - , (unsigned long) next ); - Kokkos::abort("CudaTaskPolicyQueue::schedule" ); - } - } - - //---------------------------------------- - - if ( initial_spawn ) { - // The initial spawn of a task increments the reference count - // for the task's existence in either a waiting or ready queue - // until the task has completed. - // Completing the task's execution is the matching - // decrement of the reference count. - task_root_type::assign( 0 , task ); - } - - //---------------------------------------- - // Insert this task into a dependence task that is not complete. - // Push on to that task's wait queue. - - bool attempt_insert_in_queue = true ; - - task_root_type * volatile * queue = - task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ; - - for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) { - - task_root_type * const head_value_old = *queue ; - - if ( q_denied == head_value_old ) { - // Wait queue is closed because task is complete, - // try again with the next dependence wait queue. - ++i ; - queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait - : (task_root_type **) 0 ; - } - else { - - // Wait queue is open and not denied. - // Have exclusive access to this task. - // Assign m_next assuming a successfull insertion into the queue. - // Fence the memory assignment before attempting the CAS. - - *((task_root_type * volatile *) & task->m_next ) = head_value_old ; - - memory_fence(); - - // Attempt to insert this task into the queue. - // If fails then continue the attempt. - - attempt_insert_in_queue = - head_value_old != atomic_compare_exchange(queue,head_value_old,task); - } - } - - //---------------------------------------- - // All dependences are complete, insert into the ready list - - if ( attempt_insert_in_queue ) { - - // Increment the count of ready tasks. - // Count will be decremented when task is complete. - - atomic_fetch_add( & m_count_ready , 1 ); - - queue = task->m_queue ; - - while ( attempt_insert_in_queue ) { - - // A locked queue is being popped. - - task_root_type * const head_value_old = *queue ; - - if ( q_lock != head_value_old ) { - // Read the head of ready queue, - // if same as previous value then CAS locks the ready queue - - // Have exclusive access to this task, - // assign to head of queue, assuming successful insert - // Fence assignment before attempting insert. - *((task_root_type * volatile *) & task->m_next ) = head_value_old ; - - memory_fence(); - - attempt_insert_in_queue = - head_value_old != atomic_compare_exchange(queue,head_value_old,task); - } - } - } -} - -void CudaTaskPolicyQueue::deallocate_task - ( CudaTaskPolicyQueue::task_root_type * const task ) -{ - m_space.deallocate( task , task->m_size_alloc ); -} - -KOKKOS_FUNCTION -CudaTaskPolicyQueue::task_root_type * -CudaTaskPolicyQueue::allocate_task - ( const unsigned arg_sizeof_task - , const unsigned arg_dep_capacity - , const unsigned arg_team_shmem - ) -{ - const unsigned base_size = arg_sizeof_task + - ( arg_sizeof_task % sizeof(task_root_type*) - ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*) - : 0 ); - - const unsigned dep_capacity - = ~0u == arg_dep_capacity - ? m_default_dependence_capacity - : arg_dep_capacity ; - - const unsigned size_alloc = - base_size + sizeof(task_root_type*) * dep_capacity ; - - task_root_type * const task = - reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) ); - - if ( task != 0 ) { - - // Initialize task's root and value data structure - // Calling function must copy construct the functor. - - new( (void*) task ) task_root_type(); - - task->m_policy = this ; - task->m_size_alloc = size_alloc ; - task->m_dep_capacity = dep_capacity ; - task->m_shmem_size = arg_team_shmem ; - - if ( dep_capacity ) { - task->m_dep = - reinterpret_cast<task_root_type**>( - reinterpret_cast<unsigned char*>(task) + base_size ); - - for ( unsigned i = 0 ; i < dep_capacity ; ++i ) - task->task_root_type::m_dep[i] = 0 ; - } - } - return task ; -} - -//---------------------------------------------------------------------------- - -void CudaTaskPolicyQueue::add_dependence - ( CudaTaskPolicyQueue::task_root_type * const after - , CudaTaskPolicyQueue::task_root_type * const before - ) -{ - if ( ( after != 0 ) && ( before != 0 ) ) { - - int const state = *((volatile const int *) & after->m_state ); - - // Only add dependence during construction or during execution. - // Both tasks must have the same policy. - // Dependence on non-full memory cannot be mixed with any other dependence. - - const bool ok_state = - Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state || - Kokkos::Experimental::TASK_STATE_EXECUTING == state ; - - const bool ok_capacity = - after->m_dep_size < after->m_dep_capacity ; - - const bool ok_policy = - after->m_policy == this && before->m_policy == this ; - - if ( ok_state && ok_capacity && ok_policy ) { - - ++after->m_dep_size ; - - task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before ); - - memory_fence(); - } - else { - -printf( "CudaTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n" - , (unsigned long) after - , (unsigned long) before - , ( ! ok_state ? "Task not constructing or executing" : - ( ! ok_capacity ? "Task Exceeded dependence capacity" - : "Tasks from different policies" )) ); - - Kokkos::abort("CudaTaskPolicyQueue::add_dependence ERROR"); - } - } -} - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -TaskPolicy< Kokkos::Cuda >::TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ) - : m_track() - , m_policy(0) -{ - // Allocate the queue data sructure in UVM space - - typedef Kokkos::Experimental::Impl::SharedAllocationRecord - < Kokkos::CudaUVMSpace , Impl::CudaTaskPolicyQueue::Destroy > record_type ; - - record_type * record = - record_type::allocate( Kokkos::CudaUVMSpace() - , "CudaUVM task queue" - , sizeof(Impl::CudaTaskPolicyQueue) - ); - - m_policy = reinterpret_cast< Impl::CudaTaskPolicyQueue * >( record->data() ); - - // Tasks are allocated with application's task size + sizeof(task_root_type) - - const size_t full_task_size_estimate = - arg_task_max_size + - sizeof(task_root_type) + - sizeof(task_root_type*) * arg_task_default_dependence_capacity ; - - new( m_policy ) - Impl::CudaTaskPolicyQueue( arg_task_max_count - , full_task_size_estimate - , arg_task_default_dependence_capacity - , arg_task_team_size ); - - record->m_destroy.m_policy = m_policy ; - - m_track.assign_allocated_record_to_uninitialized( record ); -} - -__global__ -static void kokkos_cuda_task_policy_queue_driver - ( Kokkos::Experimental::Impl::CudaTaskPolicyQueue * queue ) -{ - queue->driver(); -} - -void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Cuda > & policy ) -{ - const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); - const dim3 block( 1 , policy.m_policy->m_team_size , 1 ); - - const int shared = 0 ; // Kokkos::Impl::CudaTraits::SharedMemoryUsage / 2 ; - const cudaStream_t stream = 0 ; - - -#ifdef DETAILED_PRINT -printf("kokkos_cuda_task_policy_queue_driver grid(%d,%d,%d) block(%d,%d,%d) shared(%d) policy(0x%lx)\n" - , grid.x , grid.y , grid.z - , block.x , block.y , block.z - , shared - , (unsigned long)( policy.m_policy ) ); -fflush(stdout); -#endif - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - -/* - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig( kokkos_cuda_task_policy_queue_driver - , cudaFuncCachePreferL1 ) ); - - CUDA_SAFE_CALL( cudaGetLastError() ); -*/ - - kokkos_cuda_task_policy_queue_driver<<< grid , block , shared , stream >>> - ( policy.m_policy ); - - CUDA_SAFE_CALL( cudaGetLastError() ); - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - -#ifdef DETAILED_PRINT -printf("kokkos_cuda_task_policy_queue_driver end\n"); -fflush(stdout); -#endif - -} - -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -typedef TaskMember< Kokkos::Cuda , void , void > Task ; - -__host__ __device__ -Task::~TaskMember() -{ -} - -__host__ __device__ -void Task::assign( Task ** const lhs_ptr , Task * rhs ) -{ - Task * const q_denied = reinterpret_cast<Task*>(QDENIED); - - // Increment rhs reference count. - if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); } - - if ( 0 == lhs_ptr ) return ; - - // Must have exclusive access to *lhs_ptr. - // Assign the pointer and retrieve the previous value. - // Cannot use atomic exchange since *lhs_ptr may be - // in Cuda register space. - -#if 0 - - Task * const old_lhs = *((Task*volatile*)lhs_ptr); - - *((Task*volatile*)lhs_ptr) = rhs ; - - Kokkos::memory_fence(); - -#else - - Task * const old_lhs = *lhs_ptr ; - - *lhs_ptr = rhs ; - -#endif - - if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) { - Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR different queues"); - } - - if ( old_lhs ) { - - Kokkos::memory_fence(); - - // Decrement former lhs reference count. - // If reference count is zero task must be complete, then delete task. - // Task is ready for deletion when wait == q_denied - - int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ; - int const state = old_lhs->m_state ; - Task * const wait = *((Task * const volatile *) & old_lhs->m_wait ); - - const bool ok_count = 0 <= count ; - - // If count == 0 then will be deleting - // and must either be constructing or complete. - const bool ok_state = 0 < count ? true : - ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) || - ( state == int(TASK_STATE_COMPLETE) && wait == q_denied ) ) - && - old_lhs->m_next == 0 && - old_lhs->m_dep_size == 0 ; - - if ( ! ok_count || ! ok_state ) { - - printf( "%s Kokkos::Impl::TaskManager<Kokkos::Cuda>::assign ERROR deleting task(0x%lx) m_ref_count(%d) m_state(%d) m_wait(0x%ld)\n" -#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_CUDA ) - , "CUDA " -#else - , "HOST " -#endif - , (unsigned long) old_lhs - , count - , state - , (unsigned long) wait ); - Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Cuda>::assign ERROR deleting"); - } - - if ( count == 0 ) { - // When 'count == 0' this thread has exclusive access to 'old_lhs' - -#ifdef DETAILED_PRINT -printf( "Task::assign(...) old_lhs(0x%lx) deallocate\n" - , (unsigned long) old_lhs - ); -#endif - - old_lhs->m_policy->deallocate_task( old_lhs ); - } - } -} - -//---------------------------------------------------------------------------- - -__device__ -int Task::get_dependence() const -{ - return m_dep_size ; -} - -__device__ -Task * Task::get_dependence( int i ) const -{ - Task * const t = ((Task*volatile*)m_dep)[i] ; - - if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) { - -printf( "TaskMember< Cuda >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n" - , (unsigned long) this - , m_state - , m_dep_size - , i - , (unsigned long) t - ); - - Kokkos::abort("TaskMember< Cuda >::get_dependence ERROR"); - } - - return t ; -} - -//---------------------------------------------------------------------------- - -__device__ __host__ -void Task::clear_dependence() -{ - for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) { - assign( m_dep + i , 0 ); - } - - *((volatile int *) & m_dep_size ) = 0 ; - - memory_fence(); -} - -//---------------------------------------------------------------------------- - - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp deleted file mode 100644 index e71512f0391b3e264341222b82918d9901080061..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp +++ /dev/null @@ -1,833 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#ifndef KOKKOS_CUDA_TASKPOLICY_HPP -#define KOKKOS_CUDA_TASKPOLICY_HPP - -#include <Kokkos_Core_fwd.hpp> -#include <Kokkos_Cuda.hpp> -#include <Kokkos_TaskPolicy.hpp> - -#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -struct CudaTaskPolicyQueue ; - -/** \brief Base class for all Kokkos::Cuda tasks */ -template<> -class TaskMember< Kokkos::Cuda , void , void > { -public: - - template< class > friend class Kokkos::Experimental::TaskPolicy ; - friend struct CudaTaskPolicyQueue ; - - typedef void (* function_single_type) ( TaskMember * ); - typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::CudaTeamMember & ); - -private: - - CudaTaskPolicyQueue * m_policy ; - TaskMember * volatile * m_queue ; - function_team_type m_team ; ///< Apply function on CUDA - function_single_type m_serial ; ///< Apply function on CUDA - TaskMember ** m_dep ; ///< Dependences - TaskMember * m_wait ; ///< Linked list of tasks waiting on this task - TaskMember * m_next ; ///< Linked list of tasks waiting on a different task - int m_dep_capacity ; ///< Capacity of dependences - int m_dep_size ; ///< Actual count of dependences - int m_size_alloc ; - int m_shmem_size ; - int m_ref_count ; ///< Reference count - int m_state ; ///< State of the task - - - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; - -protected: - - KOKKOS_INLINE_FUNCTION - TaskMember() - : m_policy(0) - , m_queue(0) - , m_team(0) - , m_serial(0) - , m_dep(0) - , m_wait(0) - , m_next(0) - , m_size_alloc(0) - , m_dep_capacity(0) - , m_dep_size(0) - , m_shmem_size(0) - , m_ref_count(0) - , m_state( TASK_STATE_CONSTRUCTING ) - {} - -public: - - KOKKOS_FUNCTION - ~TaskMember(); - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return *((volatile int *) & m_ref_count ); } - - // Cannot use the function pointer to verify the type - // since the function pointer is not unique between - // Host and Cuda. Don't run verificaton for Cuda. - // Assume testing on Host-only back-end will catch such errors. - - template< typename ResultType > - KOKKOS_INLINE_FUNCTION static - TaskMember * verify_type( TaskMember * t ) { return t ; } - - //---------------------------------------- - /* Inheritence Requirements on task types: - * - * class DerivedTaskType - * : public TaskMember< Cuda , DerivedType::value_type , FunctorType > - * { ... }; - * - * class TaskMember< Cuda , DerivedType::value_type , FunctorType > - * : public TaskMember< Cuda , DerivedType::value_type , void > - * , public Functor - * { ... }; - * - * If value_type != void - * class TaskMember< Cuda , value_type , void > - * : public TaskMember< Cuda , void , void > - * - * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] - * - */ - //---------------------------------------- - // If after the 'apply' the task's state is waiting - // then it will be rescheduled and called again. - // Otherwise the functor must be destroyed. - - template< class DerivedTaskType , class Tag > - __device__ static - void apply_single( - typename std::enable_if - <( std::is_same< Tag , void >::value && - std::is_same< typename DerivedTaskType::result_type , void >::value - ), TaskMember * >::type t ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - functor_type * const f = - static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); - - f->apply(); - - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - - template< class DerivedTaskType , class Tag > - __device__ static - void apply_single( - typename std::enable_if - <( std::is_same< Tag , void >::value && - ! std::is_same< typename DerivedTaskType::result_type , void >::value - ), TaskMember * >::type t ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); - functor_type * const f = static_cast< functor_type * >( self ); - - f->apply( self->m_result ); - - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - - template< class DerivedTaskType , class Tag > - __device__ - void set_apply_single() - { - m_serial = & TaskMember::template apply_single<DerivedTaskType,Tag> ; - } - - //---------------------------------------- - - template< class DerivedTaskType , class Tag > - __device__ static - void apply_team( - typename std::enable_if - <( std::is_same<Tag,void>::value && - std::is_same<typename DerivedTaskType::result_type,void>::value - ), TaskMember * >::type t - , Kokkos::Impl::CudaTeamMember & member - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - functor_type * const f = - static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); - - f->apply( member ); - - __syncthreads(); // Wait for team to finish calling function - - if ( threadIdx.x == 0 && - threadIdx.y == 0 && - t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - - template< class DerivedTaskType , class Tag > - __device__ static - void apply_team( - typename std::enable_if - <( std::is_same<Tag,void>::value && - ! std::is_same<typename DerivedTaskType::result_type,void>::value - ), TaskMember * >::type t - , Kokkos::Impl::CudaTeamMember & member - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); - functor_type * const f = static_cast< functor_type * >( self ); - - f->apply( member , self->m_result ); - - __syncthreads(); // Wait for team to finish calling function - - if ( threadIdx.x == 0 && - threadIdx.y == 0 && - t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - - template< class DerivedTaskType , class Tag > - __device__ - void set_apply_team() - { - m_team = & TaskMember::template apply_team<DerivedTaskType,Tag> ; - } - - //---------------------------------------- - - KOKKOS_FUNCTION static - void assign( TaskMember ** const lhs , TaskMember * const rhs ); - - __device__ - TaskMember * get_dependence( int i ) const ; - - __device__ - int get_dependence() const ; - - KOKKOS_FUNCTION void clear_dependence(); - - __device__ - void latch_add( const int k ); - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION static - void construct_result( TaskMember * const ) {} - - typedef FutureValueTypeIsVoidError get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return get_result_type() ; } - - KOKKOS_INLINE_FUNCTION - Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } - -}; - -/** \brief A Future< Kokkos::Cuda , ResultType > will cast - * from TaskMember< Kokkos::Cuda , void , void > - * to TaskMember< Kokkos::Cuda , ResultType , void > - * to query the result. - */ -template< class ResultType > -class TaskMember< Kokkos::Cuda , ResultType , void > - : public TaskMember< Kokkos::Cuda , void , void > -{ -public: - - typedef ResultType result_type ; - - result_type m_result ; - - typedef const result_type & get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return m_result ; } - - KOKKOS_INLINE_FUNCTION static - void construct_result( TaskMember * const ptr ) - { - new((void*)(& ptr->m_result)) result_type(); - } - - TaskMember() = delete ; - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; -}; - -/** \brief Callback functions will cast - * from TaskMember< Kokkos::Cuda , void , void > - * to TaskMember< Kokkos::Cuda , ResultType , FunctorType > - * to execute work functions. - */ -template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Cuda , ResultType , FunctorType > - : public TaskMember< Kokkos::Cuda , ResultType , void > - , public FunctorType -{ -public: - typedef ResultType result_type ; - typedef FunctorType functor_type ; - - KOKKOS_INLINE_FUNCTION static - void copy_construct( TaskMember * const ptr - , const functor_type & arg_functor ) - { - typedef TaskMember< Kokkos::Cuda , ResultType , void > base_type ; - - new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor ); - - base_type::construct_result( static_cast<base_type*>( ptr ) ); - } - - TaskMember() = delete ; - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; -}; - -//---------------------------------------------------------------------------- - -namespace { - -template< class DerivedTaskType , class Tag > -__global__ -void cuda_set_apply_single( DerivedTaskType * task ) -{ - typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > - task_root_type ; - - task->task_root_type::template set_apply_single< DerivedTaskType , Tag >(); -} - -template< class DerivedTaskType , class Tag > -__global__ -void cuda_set_apply_team( DerivedTaskType * task ) -{ - typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > - task_root_type ; - - task->task_root_type::template set_apply_team< DerivedTaskType , Tag >(); -} - -} /* namespace */ -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -struct CudaTaskPolicyQueue { - - enum { NPRIORITY = 3 }; - - // Must use UVM so that tasks can be created in both - // Host and Cuda space. - - typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace > - memory_space ; - - typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Cuda , void , void > - task_root_type ; - - memory_space m_space ; - task_root_type * m_team[ NPRIORITY ] ; - task_root_type * m_serial[ NPRIORITY ]; - int m_team_size ; - int m_default_dependence_capacity ; - int volatile m_count_ready ; ///< Ready plus executing tasks - - // Execute tasks until all non-waiting tasks are complete - __device__ - void driver(); - - __device__ static - task_root_type * pop_ready_task( task_root_type * volatile * const queue ); - - // When a task finishes executing. - __device__ - void complete_executed_task( task_root_type * ); - - KOKKOS_FUNCTION void schedule_task( task_root_type * const - , const bool initial_spawn = true ); - KOKKOS_FUNCTION void reschedule_task( task_root_type * const ); - KOKKOS_FUNCTION - void add_dependence( task_root_type * const after - , task_root_type * const before ); - - - CudaTaskPolicyQueue() = delete ; - CudaTaskPolicyQueue( CudaTaskPolicyQueue && ) = delete ; - CudaTaskPolicyQueue( const CudaTaskPolicyQueue & ) = delete ; - CudaTaskPolicyQueue & operator = ( CudaTaskPolicyQueue && ) = delete ; - CudaTaskPolicyQueue & operator = ( const CudaTaskPolicyQueue & ) = delete ; - - - ~CudaTaskPolicyQueue(); - - // Construct only on the Host - CudaTaskPolicyQueue - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ); - - struct Destroy { - CudaTaskPolicyQueue * m_policy ; - void destroy_shared_allocation(); - }; - - //---------------------------------------- - /** \brief Allocate and construct a task. - * - * Allocate space for DerivedTaskType followed - * by TaskMember*[ dependence_capacity ] - */ - KOKKOS_FUNCTION - task_root_type * - allocate_task( const unsigned arg_sizeof_task - , const unsigned arg_dep_capacity - , const unsigned arg_team_shmem = 0 ); - - KOKKOS_FUNCTION void deallocate_task( task_root_type * const ); -}; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -void wait( TaskPolicy< Kokkos::Cuda > & ); - -template<> -class TaskPolicy< Kokkos::Cuda > -{ -public: - - typedef Kokkos::Cuda execution_space ; - typedef TaskPolicy execution_policy ; - typedef Kokkos::Impl::CudaTeamMember member_type ; - -private: - - typedef Impl::TaskMember< Kokkos::Cuda , void , void > task_root_type ; - typedef Kokkos::Experimental::MemoryPool< Kokkos::CudaUVMSpace > memory_space ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; - - track_type m_track ; - Impl::CudaTaskPolicyQueue * m_policy ; - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION static - const task_root_type * get_task_root( const FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION static - task_root_type * get_task_root( FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< task_root_type * >( static_cast< task_type * >(f) ); - } - -public: - - TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity = 4 - , const unsigned arg_task_team_size = 0 /* choose default */ - ); - - KOKKOS_FUNCTION TaskPolicy() = default ; - KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; - - KOKKOS_FUNCTION - int allocated_task_count() const { return 0 ; } - - //---------------------------------------- - // Create serial-thread task - // Main process and tasks must use different functions - // to work around CUDA limitation where __host__ __device__ - // functions are not allowed to invoke templated __global__ functions. - - template< class FunctorType > - Future< typename FunctorType::value_type , execution_space > - proc_create( const FunctorType & arg_functor - , const unsigned arg_dep_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - - typedef Impl::TaskMember< execution_space , value_type , FunctorType > - task_type ; - - task_type * const task = - static_cast<task_type*>( - m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) ); - - if ( task ) { - // The root part of the class has been constructed. - // Must now construct the functor and result specific part. - - task_type::copy_construct( task , arg_functor ); - - // Setting the apply pointer on the device requires code - // executing on the GPU. This function is called on the - // host process so a kernel must be run. - - // Launching a kernel will cause the allocated task in - // UVM memory to be copied to the GPU. - // Synchronize to guarantee non-concurrent access - // between host and device. - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - - Impl::cuda_set_apply_single<task_type,void><<<1,1>>>( task ); - - CUDA_SAFE_CALL( cudaGetLastError() ); - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - } - - return Future< value_type , execution_space >( task ); - } - - template< class FunctorType > - __device__ - Future< typename FunctorType::value_type , execution_space > - task_create( const FunctorType & arg_functor - , const unsigned arg_dep_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - - typedef Impl::TaskMember< execution_space , value_type , FunctorType > - task_type ; - - task_type * const task = - static_cast<task_type*>( - m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity ) ); - - if ( task ) { - // The root part of the class has been constructed. - // Must now construct the functor and result specific part. - - task_type::copy_construct( task , arg_functor ); - - // Setting the apply pointer on the device requires code - // executing on the GPU. If this function is called on the - // Host then a kernel must be run. - - task->task_root_type::template set_apply_single< task_type , void >(); - } - - return Future< value_type , execution_space >( task ); - } - - //---------------------------------------- - // Create thread-team task - // Main process and tasks must use different functions - // to work around CUDA limitation where __host__ __device__ - // functions are not allowed to invoke templated __global__ functions. - - template< class FunctorType > - Future< typename FunctorType::value_type , execution_space > - proc_create_team( const FunctorType & arg_functor - , const unsigned arg_dep_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - - typedef Impl::TaskMember< execution_space , value_type , FunctorType > - task_type ; - - const unsigned team_shmem_size = - Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value - ( arg_functor , m_policy->m_team_size ); - - task_type * const task = - static_cast<task_type*>( - m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) ); - - if ( task ) { - // The root part of the class has been constructed. - // Must now construct the functor and result specific part. - - task_type::copy_construct( task , arg_functor ); - - // Setting the apply pointer on the device requires code - // executing on the GPU. This function is called on the - // host process so a kernel must be run. - - // Launching a kernel will cause the allocated task in - // UVM memory to be copied to the GPU. - // Synchronize to guarantee non-concurrent access - // between host and device. - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - - Impl::cuda_set_apply_team<task_type,void><<<1,1>>>( task ); - - CUDA_SAFE_CALL( cudaGetLastError() ); - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - } - - return Future< value_type , execution_space >( task ); - } - - template< class FunctorType > - __device__ - Future< typename FunctorType::value_type , execution_space > - task_create_team( const FunctorType & arg_functor - , const unsigned arg_dep_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - - typedef Impl::TaskMember< execution_space , value_type , FunctorType > - task_type ; - - const unsigned team_shmem_size = - Kokkos::Impl::FunctorTeamShmemSize< FunctorType >::value - ( arg_functor , m_policy->m_team_size ); - - task_type * const task = - static_cast<task_type*>( - m_policy->allocate_task( sizeof(task_type) , arg_dep_capacity , team_shmem_size ) ); - - if ( task ) { - // The root part of the class has been constructed. - // Must now construct the functor and result specific part. - - task_type::copy_construct( task , arg_functor ); - - // Setting the apply pointer on the device requires code - // executing on the GPU. If this function is called on the - // Host then a kernel must be run. - - task->task_root_type::template set_apply_team< task_type , void >(); - } - - return Future< value_type , execution_space >( task ); - } - - //---------------------------------------- - - Future< Latch , execution_space > - KOKKOS_INLINE_FUNCTION - create_latch( const int N ) const - { - task_root_type * const task = - m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 ); - task->m_dep_size = N ; // Using m_dep_size for latch counter - task->m_state = TASK_STATE_WAITING ; - return Future< Latch , execution_space >( task ); - } - - //---------------------------------------- - - template< class A1 , class A2 , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( const Future<A1,A2> & after - , const Future<A3,A4> & before - , typename std::enable_if - < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value - && - std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const - { m_policy->add_dependence( after.m_task , before.m_task ); } - - template< class FunctorType , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( FunctorType * task_functor - , const Future<A3,A4> & before - , typename std::enable_if - < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const - { m_policy->add_dependence( get_task_root(task_functor) , before.m_task ); } - - - template< class ValueType > - KOKKOS_INLINE_FUNCTION - const Future< ValueType , execution_space > & - spawn( const Future< ValueType , execution_space > & f - , const bool priority = false ) const - { - if ( f.m_task ) { - f.m_task->m_queue = - ( f.m_task->m_team != 0 - ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) - : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); - m_policy->schedule_task( f.m_task ); - } - return f ; - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn( FunctorType * task_functor - , const bool priority = false ) const - { - task_root_type * const t = get_task_root(task_functor); - t->m_queue = - ( t->m_team != 0 ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) - : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); - m_policy->reschedule_task( t ); - } - - // When a create method fails by returning a null Future - // the task that called the create method may respawn - // with a dependence on memory becoming available. - // This is a race as more than one task may be respawned - // with this need. - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn_needing_memory( FunctorType * task_functor ) const - { - task_root_type * const t = get_task_root(task_functor); - t->m_queue = - ( t->m_team != 0 ? & ( m_policy->m_team[ 2 ] ) - : & ( m_policy->m_serial[ 2 ] ) ); - m_policy->reschedule_task( t ); - } - - //---------------------------------------- - // Functions for an executing task functor to query dependences, - // set new dependences, and respawn itself. - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< void , execution_space > - get_dependence( const FunctorType * task_functor , int i ) const - { - return Future<void,execution_space>( - get_task_root(task_functor)->get_dependence(i) - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - int get_dependence( const FunctorType * task_functor ) const - { return get_task_root(task_functor)->get_dependence(); } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void clear_dependence( FunctorType * task_functor ) const - { get_task_root(task_functor)->clear_dependence(); } - - //---------------------------------------- - - __device__ - static member_type member_single() - { - return - member_type( 0 /* shared memory pointer */ - , 0 /* shared memory begin offset */ - , 0 /* shared memory end offset */ - , 0 /* scratch level_1 pointer */ - , 0 /* scratch level_1 size */ - , 0 /* league rank */ - , 1 /* league size */ ); - } - - friend void wait( TaskPolicy< Kokkos::Cuda > & ); -}; - -} /* namespace Experimental */ -} /* namespace Kokkos */ - - -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* #ifndef KOKKOS_CUDA_TASKPOLICY_HPP */ - - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index 92f6fc1f5f89a75fe717d351af5395da8bf894a4..b505b766a03cef464b4f880fdc788b6f46ead1c3 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -41,53 +41,266 @@ //@HEADER */ -#ifndef KOKKOS_CUDA_VIEW_HPP -#define KOKKOS_CUDA_VIEW_HPP - -#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP +#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP /* only compile this file if CUDA is enabled for Kokkos */ -#ifdef KOKKOS_HAVE_CUDA +#if defined( KOKKOS_HAVE_CUDA ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- -#include <cstring> +namespace Kokkos { +namespace Experimental { +namespace Impl { -#include <Kokkos_HostSpace.hpp> -#include <Kokkos_CudaSpace.hpp> -#include <impl/Kokkos_Shape.hpp> -#include <Kokkos_View.hpp> +// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4) +// Via reinterpret_case this can be used to support all scalar types of those sizes. +// Any other scalar type falls back to either normal reads out of global memory, +// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0) + +template< typename ValueType , typename AliasType > +struct CudaTextureFetch { + + ::cudaTextureObject_t m_obj ; + const ValueType * m_ptr ; + int m_offset ; + + // Deference operator pulls through texture object and returns by value + template< typename iType > + KOKKOS_INLINE_FUNCTION + ValueType operator[]( const iType & i ) const + { +#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) + AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset ); + return *(reinterpret_cast<ValueType*> (&v)); +#else + return m_ptr[ i ]; +#endif + } + + // Pointer to referenced memory + KOKKOS_INLINE_FUNCTION + operator const ValueType * () const { return m_ptr ; } + + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {} + + KOKKOS_INLINE_FUNCTION + ~CudaTextureFetch() {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch( const CudaTextureFetch & rhs ) + : m_obj( rhs.m_obj ) + , m_ptr( rhs.m_ptr ) + , m_offset( rhs.m_offset ) + {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch( CudaTextureFetch && rhs ) + : m_obj( rhs.m_obj ) + , m_ptr( rhs.m_ptr ) + , m_offset( rhs.m_offset ) + {} + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) + { + m_obj = rhs.m_obj ; + m_ptr = rhs.m_ptr ; + m_offset = rhs.m_offset ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + CudaTextureFetch & operator = ( CudaTextureFetch && rhs ) + { + m_obj = rhs.m_obj ; + m_ptr = rhs.m_ptr ; + m_offset = rhs.m_offset ; + return *this ; + } + + // Texture object spans the entire allocation. + // This handle may view a subset of the allocation, so an offset is required. + template< class CudaMemorySpace > + inline explicit + CudaTextureFetch( const ValueType * const arg_ptr + , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record + ) + : m_obj( record.template attach_texture_object< AliasType >() ) + , m_ptr( arg_ptr ) + , m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) ) + {} + + // Texture object spans the entire allocation. + // This handle may view a subset of the allocation, so an offset is required. + KOKKOS_INLINE_FUNCTION + CudaTextureFetch( const CudaTextureFetch & rhs , size_t offset ) + : m_obj( rhs.m_obj ) + , m_ptr( rhs.m_ptr + offset) + , m_offset( offset + rhs.m_offset ) + {} +}; + +#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) + +template< typename ValueType , typename AliasType > +struct CudaLDGFetch { + + const ValueType * m_ptr ; + + template< typename iType > + KOKKOS_INLINE_FUNCTION + ValueType operator[]( const iType & i ) const + { + #ifdef __CUDA_ARCH__ + AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i])); + return *(reinterpret_cast<ValueType*> (&v)); + #else + return m_ptr[i]; + #endif + } + + KOKKOS_INLINE_FUNCTION + operator const ValueType * () const { return m_ptr ; } + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch() : m_ptr() {} + + KOKKOS_INLINE_FUNCTION + ~CudaLDGFetch() {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch( const CudaLDGFetch & rhs ) + : m_ptr( rhs.m_ptr ) + {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch( CudaLDGFetch && rhs ) + : m_ptr( rhs.m_ptr ) + {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch & operator = ( const CudaLDGFetch & rhs ) + { + m_ptr = rhs.m_ptr ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch & operator = ( CudaLDGFetch && rhs ) + { + m_ptr = rhs.m_ptr ; + return *this ; + } + + template< class CudaMemorySpace > + inline explicit + CudaLDGFetch( const ValueType * const arg_ptr + , Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const & + ) + : m_ptr( arg_ptr ) + {} + + KOKKOS_INLINE_FUNCTION + CudaLDGFetch( CudaLDGFetch const rhs ,size_t offset) + : m_ptr( rhs.m_ptr + offset ) + {} + +}; + +#endif + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { +namespace Experimental { namespace Impl { -template<> -struct AssertShapeBoundsAbort< CudaSpace > +/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization + * if 'const' value type, CudaSpace and random access. + */ +template< class Traits > +class ViewDataHandle< Traits , + typename std::enable_if<( + // Is Cuda memory space + ( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || + std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ) + && + // Is a trivial const value of 4, 8, or 16 bytes + std::is_trivial<typename Traits::const_value_type>::value + && + std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value + && + ( sizeof(typename Traits::const_value_type) == 4 || + sizeof(typename Traits::const_value_type) == 8 || + sizeof(typename Traits::const_value_type) == 16 ) + && + // Random access trait + ( Traits::memory_traits::RandomAccess != 0 ) + )>::type > { +public: + + using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ; + + using value_type = typename Traits::const_value_type ; + using return_type = typename Traits::const_value_type ; // NOT a reference + + using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int , + typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 , + typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void + >::type + >::type + >::type ; + +#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC ) + using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ; +#else + using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ; +#endif + + KOKKOS_INLINE_FUNCTION + static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ ) + { + return arg_handle ; + } + KOKKOS_INLINE_FUNCTION - static void apply( const size_t /* rank */ , - const size_t /* n0 */ , const size_t /* n1 */ , - const size_t /* n2 */ , const size_t /* n3 */ , - const size_t /* n4 */ , const size_t /* n5 */ , - const size_t /* n6 */ , const size_t /* n7 */ , - - const size_t /* arg_rank */ , - const size_t /* i0 */ , const size_t /* i1 */ , - const size_t /* i2 */ , const size_t /* i3 */ , - const size_t /* i4 */ , const size_t /* i5 */ , - const size_t /* i6 */ , const size_t /* i7 */ ) + static handle_type const assign( handle_type const & arg_handle , size_t offset ) { - Kokkos::abort("Kokkos::View array bounds violation"); + return handle_type(arg_handle,offset) ; + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker ) + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + // Assignment of texture = non-texture requires creation of a texture object + // which can only occur on the host. In addition, 'get_record' is only valid + // if called in a host execution space + return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() ); +#else + Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel"); + return handle_type(); +#endif } }; +} } } //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif // KOKKOS_HAVE_CUDA +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp index deb955ccd4755d43a24469171f2689d8c2a87dae..60903b757f921823189e47f2137bfeb714a09db2 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp @@ -47,18 +47,10 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #include "Kokkos_Macros.hpp" -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) +#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) #include <cuda.h> -#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 ) -#error "Cuda version 4.1 or greater required" -#endif - -#if ( __CUDA_ARCH__ < 200 ) -#error "Cuda device capability 2.0 or greater required" -#endif - extern "C" { /* Cuda runtime function, declared in <crt/device_runtime.h> * Requires capability 2.x or better. @@ -90,30 +82,6 @@ void cuda_abort( const char * const message ) } // namespace Impl } // namespace Kokkos - -#else - -namespace Kokkos { -namespace Impl { -KOKKOS_INLINE_FUNCTION -void cuda_abort( const char * const ) {} -} -} - -#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) -namespace Kokkos { -__device__ inline -void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); } -} -#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - +#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp index 6d37d69a63c8c837457fb2edba6a6d607103b6ad..3102402b8383beb8ec92bee80af6e4ab7d231dd5 100644 --- a/lib/kokkos/core/src/Kokkos_Atomic.hpp +++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp @@ -75,15 +75,16 @@ #if defined(_WIN32) #define KOKKOS_ATOMICS_USE_WINDOWS #else -#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) // Compiling NVIDIA device code, must use Cuda atomics: #define KOKKOS_ATOMICS_USE_CUDA +#endif -#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \ - ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \ - ! defined( KOKKOS_ATOMICS_USE_OMP31 ) +#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \ + ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \ + ! defined( KOKKOS_ATOMICS_USE_OMP31 ) // Compiling for non-Cuda atomic implementation has not been pre-selected. // Choose the best implementation for the detected compiler. @@ -91,7 +92,7 @@ #if defined( KOKKOS_COMPILER_GNU ) || \ defined( KOKKOS_COMPILER_CLANG ) || \ - ( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) ) + ( defined ( KOKKOS_COMPILER_NVCC ) ) #define KOKKOS_ATOMICS_USE_GCC @@ -126,6 +127,9 @@ namespace Impl { /// This function tries to aquire the lock for the hash value derived /// from the provided ptr. If the lock is successfully aquired the /// function returns true. Otherwise it returns false. +#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE +extern +#endif __device__ inline bool lock_address_cuda_space(void* ptr); @@ -135,6 +139,9 @@ bool lock_address_cuda_space(void* ptr); /// from the provided ptr. This function should only be called /// after previously successfully aquiring a lock with /// lock_address. +#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE +extern +#endif __device__ inline void unlock_address_cuda_space(void* ptr); } @@ -287,7 +294,7 @@ const char * atomic_query_version() //---------------------------------------------------------------------------- // This atomic-style macro should be an inlined function, not a macro -#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__) +#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__) && !defined(__CUDA_ARCH__) #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0) #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0) diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index 82a342eec0bfba9e5420b86d41c586b22969712c..af83e5cac66069c94bc58b8cc22abc968f14ee59 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -46,7 +46,14 @@ #include <type_traits> +// Needed for 'is_space<S>::host_mirror_space +#include <Kokkos_Core_fwd.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + namespace Kokkos { + //Schedules for Execution Policies struct Static {}; struct Dynamic {}; @@ -59,7 +66,7 @@ struct Schedule || std::is_same<T,Dynamic>::value , "Kokkos: Invalid Schedule<> type." ); - using schedule_type = Schedule<T>; + using schedule_type = Schedule ; using type = T; }; @@ -68,11 +75,268 @@ template<typename T> struct IndexType { static_assert(std::is_integral<T>::value,"Kokkos: Invalid IndexType<>."); - using index_type = IndexType<T>; + using index_type = IndexType ; using type = T; }; } // namespace Kokkos +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +#define KOKKOS_IMPL_IS_CONCEPT( CONCEPT ) \ + template< typename T > struct is_ ## CONCEPT { \ + private: \ + template< typename , typename = std::true_type > struct have : std::false_type {}; \ + template< typename U > struct have<U,typename std::is_same<U,typename U:: CONCEPT >::type> : std::true_type {}; \ + public: \ + enum { value = is_ ## CONCEPT::template have<T>::value }; \ + }; + +// Public concept: + +KOKKOS_IMPL_IS_CONCEPT( memory_space ) +KOKKOS_IMPL_IS_CONCEPT( memory_traits ) +KOKKOS_IMPL_IS_CONCEPT( execution_space ) +KOKKOS_IMPL_IS_CONCEPT( execution_policy ) +KOKKOS_IMPL_IS_CONCEPT( array_layout ) + +namespace Impl { + +// For backward compatibility: + +using Kokkos::is_memory_space ; +using Kokkos::is_memory_traits ; +using Kokkos::is_execution_space ; +using Kokkos::is_execution_policy ; +using Kokkos::is_array_layout ; + +// Implementation concept: + +KOKKOS_IMPL_IS_CONCEPT( iteration_pattern ) +KOKKOS_IMPL_IS_CONCEPT( schedule_type ) +KOKKOS_IMPL_IS_CONCEPT( index_type ) + +} + +#undef KOKKOS_IMPL_IS_CONCEPT + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< class ExecutionSpace , class MemorySpace > +struct Device { + static_assert( Kokkos::is_execution_space<ExecutionSpace>::value + , "Execution space is not valid" ); + static_assert( Kokkos::is_memory_space<MemorySpace>::value + , "Memory space is not valid" ); + typedef ExecutionSpace execution_space; + typedef MemorySpace memory_space; + typedef Device<execution_space,memory_space> device_type; +}; + + +template< typename T > +struct is_space { +private: + + template< typename , typename = void > + struct exe : std::false_type { typedef void space ; }; + + template< typename , typename = void > + struct mem : std::false_type { typedef void space ; }; + + template< typename , typename = void > + struct dev : std::false_type { typedef void space ; }; + + template< typename U > + struct exe<U,typename std::conditional<true,void,typename U::execution_space>::type> + : std::is_same<U,typename U::execution_space>::type + { typedef typename U::execution_space space ; }; + + template< typename U > + struct mem<U,typename std::conditional<true,void,typename U::memory_space>::type> + : std::is_same<U,typename U::memory_space>::type + { typedef typename U::memory_space space ; }; + + template< typename U > + struct dev<U,typename std::conditional<true,void,typename U::device_type>::type> + : std::is_same<U,typename U::device_type>::type + { typedef typename U::device_type space ; }; + + typedef typename is_space::template exe<T> is_exe ; + typedef typename is_space::template mem<T> is_mem ; + typedef typename is_space::template dev<T> is_dev ; + +public: + + enum { value = is_exe::value || is_mem::value || is_dev::value }; + + typedef typename is_exe::space execution_space ; + typedef typename is_mem::space memory_space ; + + // For backward compatibility, deprecated in favor of + // Kokkos::Impl::HostMirror<S>::host_mirror_space + + typedef typename std::conditional + < std::is_same< memory_space , Kokkos::HostSpace >::value +#if defined( KOKKOS_HAVE_CUDA ) + || std::is_same< memory_space , Kokkos::CudaUVMSpace >::value + || std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ + , memory_space + , Kokkos::HostSpace + >::type host_memory_space ; + +#if defined( KOKKOS_HAVE_CUDA ) + typedef typename std::conditional + < std::is_same< execution_space , Kokkos::Cuda >::value + , Kokkos::DefaultHostExecutionSpace , execution_space + >::type host_execution_space ; +#else + typedef execution_space host_execution_space ; +#endif + + typedef typename std::conditional + < std::is_same< execution_space , host_execution_space >::value && + std::is_same< memory_space , host_memory_space >::value + , T , Kokkos::Device< host_execution_space , host_memory_space > + >::type host_mirror_space ; +}; + +// For backward compatiblity + +namespace Impl { + +using Kokkos::is_space ; + +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/**\brief Access relationship between DstMemorySpace and SrcMemorySpace + * + * The default case can assume accessibility for the same space. + * Specializations must be defined for different memory spaces. + */ +template< typename DstMemorySpace , typename SrcMemorySpace > +struct MemorySpaceAccess { + + static_assert( Kokkos::is_memory_space< DstMemorySpace >::value && + Kokkos::is_memory_space< SrcMemorySpace >::value + , "template arguments must be memory spaces" ); + + /**\brief Can a View (or pointer) to memory in SrcMemorySpace + * be assigned to a View (or pointer) to memory marked DstMemorySpace. + * + * 1. DstMemorySpace::execution_space == SrcMemorySpace::execution_space + * 2. All execution spaces that can access DstMemorySpace can also access + * SrcMemorySpace. + */ + enum { assignable = std::is_same<DstMemorySpace,SrcMemorySpace>::value }; + + /**\brief For all DstExecSpace::memory_space == DstMemorySpace + * DstExecSpace can access SrcMemorySpace. + */ + enum { accessible = assignable }; + + /**\brief Does a DeepCopy capability exist + * to DstMemorySpace from SrcMemorySpace + */ + enum { deepcopy = assignable }; +}; + + +/**\brief Can AccessSpace access MemorySpace ? + * + * Requires: + * Kokkos::is_space< AccessSpace >::value + * Kokkos::is_memory_space< MemorySpace >::value + * + * Can AccessSpace::execution_space access MemorySpace ? + * enum : bool { accessible }; + * + * Is View<AccessSpace::memory_space> assignable from View<MemorySpace> ? + * enum : bool { assignable }; + * + * If ! accessible then through which intercessory memory space + * should a be used to deep copy memory for + * AccessSpace::execution_space + * to get access. + * When AccessSpace::memory_space == Kokkos::HostSpace + * then space is the View host mirror space. + */ +template< typename AccessSpace , typename MemorySpace > +struct SpaceAccessibility { +private: + + static_assert( Kokkos::is_space< AccessSpace >::value + , "template argument #1 must be a Kokkos space" ); + + static_assert( Kokkos::is_memory_space< MemorySpace >::value + , "template argument #2 must be a Kokkos memory space" ); + + // The input AccessSpace may be a Device<ExecSpace,MemSpace> + // verify that it is a valid combination of spaces. + static_assert( Kokkos::Impl::MemorySpaceAccess + < typename AccessSpace::execution_space::memory_space + , typename AccessSpace::memory_space + >::accessible + , "template argument #1 is an invalid space" ); + + typedef Kokkos::Impl::MemorySpaceAccess + < typename AccessSpace::execution_space::memory_space , MemorySpace > + exe_access ; + + typedef Kokkos::Impl::MemorySpaceAccess + < typename AccessSpace::memory_space , MemorySpace > + mem_access ; + +public: + + /**\brief Can AccessSpace::execution_space access MemorySpace ? + * + * Default based upon memory space accessibility. + * Specialization required for other relationships. + */ + enum { accessible = exe_access::accessible }; + + /**\brief Can assign to AccessSpace from MemorySpace ? + * + * Default based upon memory space accessibility. + * Specialization required for other relationships. + */ + enum { assignable = + is_memory_space< AccessSpace >::value && mem_access::assignable }; + + /**\brief Can deep copy to AccessSpace::memory_Space from MemorySpace ? */ + enum { deepcopy = mem_access::deepcopy }; + + // What intercessory space for AccessSpace::execution_space + // to be able to access MemorySpace? + // If same memory space or not accessible use the AccessSpace + // else construct a device with execution space and memory space. + typedef typename std::conditional + < std::is_same<typename AccessSpace::memory_space,MemorySpace>::value || + ! exe_access::accessible + , AccessSpace + , Kokkos::Device< typename AccessSpace::execution_space , MemorySpace > + >::type space ; +}; + +}} // namespace Kokkos::Impl + +//---------------------------------------------------------------------------- + #endif // KOKKOS_CORE_CONCEPTS_HPP diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 7cde4610ee8957c2eea7a9a2e05c8f2cbb9463f4..266f750d3753321bb142a2c80fe8d65f8a034f90 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -72,6 +72,7 @@ #include <Kokkos_Vectorization.hpp> #include <Kokkos_Atomic.hpp> #include <Kokkos_hwloc.hpp> +#include <Kokkos_Timer.hpp> #ifdef KOKKOS_HAVE_CXX11 #include <Kokkos_Complex.hpp> @@ -112,7 +113,6 @@ void fence(); //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { /* Allocate memory from a memory space. * The allocation is tracked in Kokkos memory tracking system, so @@ -155,18 +155,8 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size ) reallocate_tracked( arg_alloc , arg_alloc_size ); } -} // namespace Experimental } // namespace Kokkos - -namespace Kokkos { - -using Kokkos::Experimental::kokkos_malloc ; -using Kokkos::Experimental::kokkos_realloc ; -using Kokkos::Experimental::kokkos_free ; - -} - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index e9648b59b8f62c5cb4ea46c00ec1498c361cbdb4..0f5ef9200a9b14ac1cec7361449c5f123cc24f48 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -49,6 +49,7 @@ // and compiler environment then sets a collection of #define macros. #include <Kokkos_Macros.hpp> +#include <impl/Kokkos_Utilities.hpp> //---------------------------------------------------------------------------- // Have assumed a 64bit build (8byte pointers) throughout the code base. @@ -207,7 +208,7 @@ namespace Impl { template< class Functor , class Policy - , class EnableFunctor = void + , class EnableFunctor = void , class EnablePolicy = void > struct FunctorPolicyExecutionSpace; @@ -220,7 +221,7 @@ struct FunctorPolicyExecutionSpace; /// This is an implementation detail of parallel_for. Users should /// skip this and go directly to the nonmember function parallel_for. template< class FunctorType , class ExecPolicy , class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space > class ParallelFor ; /// \class ParallelReduce @@ -229,7 +230,7 @@ template< class FunctorType , class ExecPolicy , class ExecutionSpace = /// This is an implementation detail of parallel_reduce. Users should /// skip this and go directly to the nonmember function parallel_reduce. template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType, class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space > class ParallelReduce ; /// \class ParallelScan @@ -238,8 +239,8 @@ template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType /// This is an implementation detail of parallel_scan. Users should /// skip this and go directly to the documentation of the nonmember /// template function Kokkos::parallel_scan. -template< class FunctorType , class ExecPolicy , class ExecutionSapce = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space +template< class FunctorType , class ExecPolicy , class ExecutionSapce = + typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space > class ParallelScan ; }} diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index 3130ee3198f35ec59dbeef7755cfffc11fda9346..84ae5ee044c4bd62c459656fccb3cb95ca7328eb 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -56,7 +56,7 @@ #include <Kokkos_CudaSpace.hpp> #include <Kokkos_Parallel.hpp> -#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_MemoryTraits.hpp> @@ -229,6 +229,39 @@ private: namespace Kokkos { namespace Impl { +template<> +struct MemorySpaceAccess + < Kokkos::CudaSpace + , Kokkos::Cuda::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + +#if defined( KOKKOS_USE_CUDA_UVM ) + +// If forcing use of UVM everywhere +// then must assume that CudaUVMSpace +// can be a stand-in for CudaSpace. +// This will fail when a strange host-side execution space +// that defines CudaUVMSpace as its preferredmemory space. + +template<> +struct MemorySpaceAccess + < Kokkos::CudaUVMSpace + , Kokkos::Cuda::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + +#endif + + template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::CudaSpace @@ -259,9 +292,6 @@ struct VerifyExecutionCanAccessMemorySpace #include <Cuda/Kokkos_CudaExec.hpp> #include <Cuda/Kokkos_Cuda_View.hpp> - -#include <Cuda/KokkosExp_Cuda_View.hpp> - #include <Cuda/Kokkos_Cuda_Parallel.hpp> #include <Cuda/Kokkos_Cuda_Task.hpp> diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp index cd728895d0f02419d702ccb37ec9b048b08a6df8..fd9b0ad123004c1a125b6cebb93af8052e68e719 100644 --- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -88,6 +88,9 @@ public: void deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const ; + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name(); + /*--------------------------------*/ /** \brief Error reporting for HostSpace attempt to access CudaSpace */ static void access_error(); @@ -97,7 +100,8 @@ private: int m_device ; ///< Which Cuda device - // friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ; + static constexpr const char* m_name = "Cuda"; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ; }; namespace Impl { @@ -156,6 +160,14 @@ public: /** \brief If UVM capability is available */ static bool available(); + + /*--------------------------------*/ + /** \brief CudaUVMSpace specific routine */ + static int number_of_allocations(); + + /*--------------------------------*/ + + /*--------------------------------*/ CudaUVMSpace(); @@ -172,11 +184,16 @@ public: void deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const ; + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name(); + /*--------------------------------*/ private: - int m_device ; ///< Which Cuda device + + static constexpr const char* m_name = "CudaUVM"; + }; } // namespace Kokkos @@ -215,6 +232,13 @@ public: void deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const ; + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name(); + +private: + + static constexpr const char* m_name = "CudaHostPinned"; + /*--------------------------------*/ }; @@ -226,6 +250,126 @@ public: namespace Kokkos { namespace Impl { +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" ); +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" ); +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + +//---------------------------------------- + +template<> +struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > { + enum { assignable = false }; + enum { accessible = false }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > { + // HostSpace::execution_space != CudaUVMSpace::execution_space + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > { + // HostSpace::execution_space == CudaHostPinnedSpace::execution_space + enum { assignable = true }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +//---------------------------------------- + +template<> +struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace > { + enum { assignable = false }; + enum { accessible = false }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > { + // CudaSpace::execution_space == CudaUVMSpace::execution_space + enum { assignable = true }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > { + // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space + enum { assignable = false }; + enum { accessible = true }; // CudaSpace::execution_space + enum { deepcopy = true }; +}; + +//---------------------------------------- +// CudaUVMSpace::execution_space == Cuda +// CudaUVMSpace accessible to both Cuda and Host + +template<> +struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace > { + enum { assignable = false }; + enum { accessible = false }; // Cuda cannot access HostSpace + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > { + // CudaUVMSpace::execution_space == CudaSpace::execution_space + // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host + enum { assignable = false }; + + // CudaUVMSpace::execution_space can access CudaSpace + enum { accessible = true }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > { + // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space + enum { assignable = false }; + enum { accessible = true }; // CudaUVMSpace::execution_space + enum { deepcopy = true }; +}; + + +//---------------------------------------- +// CudaHostPinnedSpace::execution_space == HostSpace::execution_space +// CudaHostPinnedSpace accessible to both Cuda and Host + +template<> +struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace > { + enum { assignable = false }; // Cannot access from Cuda + enum { accessible = true }; // CudaHostPinnedSpace::execution_space + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > { + enum { assignable = false }; // Cannot access from Host + enum { accessible = false }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > { + enum { assignable = false }; // different execution_space + enum { accessible = true }; // same accessibility + enum { deepcopy = true }; +}; + +//---------------------------------------- + +}} // namespace Kokkos::Impl + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + void DeepCopyAsyncCuda( void * dst , const void * src , size_t n); template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda> @@ -553,7 +697,6 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHost //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template<> @@ -791,7 +934,6 @@ public: }; } // namespace Impl -} // namespace Experimental } // namespace Kokkos //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index 5834fc04dbe43c78bd53b032db1e97ade5e34655..db4d67ae7d9656a998c1d3ff867dc6c1601562b7 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -52,6 +52,7 @@ #include <impl/Kokkos_AnalyzePolicy.hpp> #include <Kokkos_Concepts.hpp> #include <iostream> + //---------------------------------------------------------------------------- namespace Kokkos { @@ -82,7 +83,6 @@ class RangePolicy : public Impl::PolicyTraits<Properties ... > { private: - typedef Impl::PolicyTraits<Properties ... > traits; typename traits::execution_space m_space ; @@ -90,8 +90,8 @@ private: typename traits::index_type m_end ; typename traits::index_type m_granularity ; typename traits::index_type m_granularity_mask ; -public: +public: //! Tag this class as an execution policy typedef RangePolicy execution_policy; typedef typename traits::index_type member_type ; @@ -100,7 +100,6 @@ public: KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; } KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; } - //TODO: find a better workaround for Clangs weird instantiation order // This thing is here because of an instantiation error, where the RangePolicy is inserted into FunctorValue Traits, which // tries decltype on the operator. It tries to do this even though the first argument of parallel for clearly doesn't match. @@ -135,47 +134,45 @@ public: , work_begin , work_end ) {} - public: - - /** \brief return chunk_size */ - inline member_type chunk_size() const { - return m_granularity; - } +public: + /** \brief return chunk_size */ + inline member_type chunk_size() const { + return m_granularity; + } + + /** \brief set chunk_size to a discrete value*/ + inline RangePolicy set_chunk_size(int chunk_size_) const { + RangePolicy p = *this; + p.m_granularity = chunk_size_; + p.m_granularity_mask = p.m_granularity - 1; + return p; + } - /** \brief set chunk_size to a discrete value*/ - inline RangePolicy set_chunk_size(int chunk_size_) const { - RangePolicy p = *this; - p.m_granularity = chunk_size_; - p.m_granularity_mask = p.m_granularity - 1; - return p; - } +private: + /** \brief finalize chunk_size if it was set to AUTO*/ + inline void set_auto_chunk_size() { + + typename traits::index_type concurrency = traits::execution_space::concurrency(); + if( concurrency==0 ) concurrency=1; + + if(m_granularity > 0) { + if(!Impl::is_integral_power_of_two( m_granularity )) + Kokkos::abort("RangePolicy blocking granularity must be power of two" ); + } + + member_type new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_end-m_begin) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_granularity = new_chunk_size; + m_granularity_mask = m_granularity - 1; + } - private: - /** \brief finalize chunk_size if it was set to AUTO*/ - inline void set_auto_chunk_size() { - - typename traits::index_type concurrency = traits::execution_space::concurrency(); - if( concurrency==0 ) concurrency=1; - - if(m_granularity > 0) { - if(!Impl::is_integral_power_of_two( m_granularity )) - Kokkos::abort("RangePolicy blocking granularity must be power of two" ); - } - - - member_type new_chunk_size = 1; - while(new_chunk_size*100*concurrency < m_end-m_begin) - new_chunk_size *= 2; - if(new_chunk_size < 128) { - new_chunk_size = 1; - while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) ) - new_chunk_size*=2; - } - m_granularity = new_chunk_size; - m_granularity_mask = m_granularity - 1; - } - - public: +public: /** \brief Subrange for a partition's rank and size. * * Typically used to partition a range over a group of threads. @@ -212,16 +209,15 @@ public: if ( range.end() < m_end ) m_end = range.end() ; } } - private: - member_type m_begin ; - member_type m_end ; - WorkRange(); - WorkRange & operator = ( const WorkRange & ); + private: + member_type m_begin ; + member_type m_end ; + WorkRange(); + WorkRange & operator = ( const WorkRange & ); }; }; - } // namespace Kokkos //---------------------------------------------------------------------------- @@ -231,7 +227,6 @@ namespace Kokkos { namespace Impl { - template< class ExecSpace, class ... Properties> class TeamPolicyInternal: public Impl::PolicyTraits<Properties ... > { private: @@ -245,6 +240,10 @@ public: * This size takes into account execution space concurrency limitations and * scratch memory space limitations for reductions, team reduce/scan, and * team shared memory. + * + * This function only works for single-operator functors. + * With multi-operator functors it cannot be determined + * which operator will be called. */ template< class FunctorType > static int team_size_max( const FunctorType & ); @@ -254,6 +253,10 @@ public: * This size takes into account execution space concurrency limitations and * scratch memory space limitations for reductions, team reduce/scan, and * team shared memory. + * + * This function only works for single-operator functors. + * With multi-operator functors it cannot be determined + * which operator will be called. */ template< class FunctorType > static int team_size_recommended( const FunctorType & ); @@ -344,9 +347,7 @@ public: KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ; }; }; -} -namespace Impl { struct PerTeamValue { int value; PerTeamValue(int arg); @@ -356,12 +357,12 @@ namespace Impl { int value; PerThreadValue(int arg); }; + } Impl::PerTeamValue PerTeam(const int& arg); Impl::PerThreadValue PerThread(const int& arg); - /** \brief Execution policy for parallel work over a league of teams of threads. * * The work functor is called for each thread of each team such that @@ -443,10 +444,6 @@ public: }; -} // namespace Kokkos - -namespace Kokkos { - namespace Impl { template<typename iType, class TeamMemberType> @@ -484,8 +481,8 @@ public: KOKKOS_INLINE_FUNCTION TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread - , const iType& arg_end - ) + , const iType& arg_end + ) : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) , thread( arg_thread ) @@ -502,32 +499,33 @@ public: {} }; - template<typename iType, class TeamMemberType> - struct ThreadVectorRangeBoundariesStruct { - typedef iType index_type; - enum {start = 0}; - const iType end; - enum {increment = 1}; +template<typename iType, class TeamMemberType> +struct ThreadVectorRangeBoundariesStruct { + typedef iType index_type; + enum {start = 0}; + const iType end; + enum {increment = 1}; - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count): - end( count ) - {} - }; + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct ( const TeamMemberType, const iType& count ) : end( count ) {} + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct ( const iType& count ) : end( count ) {} +}; - template<class TeamMemberType> - struct ThreadSingleStruct { - const TeamMemberType& team_member; - KOKKOS_INLINE_FUNCTION - ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){} - }; +template<class TeamMemberType> +struct ThreadSingleStruct { + const TeamMemberType& team_member; + KOKKOS_INLINE_FUNCTION + ThreadSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {} +}; + +template<class TeamMemberType> +struct VectorSingleStruct { + const TeamMemberType& team_member; + KOKKOS_INLINE_FUNCTION + VectorSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {} +}; - template<class TeamMemberType> - struct VectorSingleStruct { - const TeamMemberType& team_member; - KOKKOS_INLINE_FUNCTION - VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){} - }; } // namespace Impl /** \brief Execution policy for parallel work over a threads within a team. @@ -538,7 +536,8 @@ public: */ template<typename iType, class TeamMemberType> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count); +Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> +TeamThreadRange( const TeamMemberType&, const iType& count ); /** \brief Execution policy for parallel work over a threads within a team. * @@ -546,9 +545,10 @@ Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(cons * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. */ -template<typename iType, class TeamMemberType> +template<typename iType1, typename iType2, class TeamMemberType> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end); +Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType> +TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ); /** \brief Execution policy for a vector parallel loop. * @@ -558,13 +558,12 @@ Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(cons */ template<typename iType, class TeamMemberType> KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count); +Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> +ThreadVectorRange( const TeamMemberType&, const iType& count ); } // namespace Kokkos - #endif /* #define KOKKOS_EXECPOLICY_HPP */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- - diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index e02689b0f96f370448061cb90bd80a3492d32c35..10e735fe00151d302600f69fce3de798025621af 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -46,7 +46,6 @@ #include <Kokkos_HostSpace.hpp> -#include <impl/Kokkos_HBWAllocators.hpp> /*--------------------------------------------------------------------------*/ #ifdef KOKKOS_HAVE_HBWSPACE @@ -148,11 +147,14 @@ public: void deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const ; + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name(); + private: AllocationMechanism m_alloc_mech ; - - friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ; + static constexpr const char* m_name = "HBW"; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ; }; } // namespace Experimental @@ -162,7 +164,6 @@ private: //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template<> @@ -239,9 +240,33 @@ public: }; } // namespace Impl -} // namespace Experimental } // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::Experimental::HBWSpace >::assignable , "" ); + +template<> +struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > { + enum { assignable = true }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +template<> +struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace> { + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +}} + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index 5fe686559a07d63cb4a07bf821203672c1336699..0292dd8a6c73ab56e63d0af528e41e5d676a3f6b 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -50,12 +50,12 @@ #include <typeinfo> #include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Concepts.hpp> #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_Error.hpp> - -#include <impl/KokkosExp_SharedAlloc.hpp> +#include <impl/Kokkos_SharedAlloc.hpp> /*--------------------------------------------------------------------------*/ @@ -155,20 +155,63 @@ public: void deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const ; + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name(); + private: AllocationMechanism m_alloc_mech ; + static constexpr const char* m_name = "Host"; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ; +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); + + +template< typename S > +struct HostMirror { +private: - friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ; + // If input execution space can access HostSpace then keep it. + // Example: Kokkos::OpenMP can access, Kokkos::Cuda cannot + enum { keep_exe = Kokkos::Impl::MemorySpaceAccess + < typename S::execution_space::memory_space , Kokkos::HostSpace > + ::accessible }; + + // If HostSpace can access memory space then keep it. + // Example: Cannot access Kokkos::CudaSpace, can access Kokkos::CudaUVMSpace + enum { keep_mem = Kokkos::Impl::MemorySpaceAccess + < Kokkos::HostSpace , typename S::memory_space >::accessible }; + +public: + + typedef typename std::conditional + < keep_exe && keep_mem /* Can keep whole space */ + , S + , typename std::conditional + < keep_mem /* Can keep memory space, use default Host execution space */ + , Kokkos::Device< Kokkos::HostSpace::execution_space + , typename S::memory_space > + , Kokkos::HostSpace + >::type + >::type Space ; }; +} // namespace Impl } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template<> @@ -245,7 +288,6 @@ public: }; } // namespace Impl -} // namespace Experimental } // namespace Kokkos //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index c77c33703bdd76161b20c2e5ae59b96c03c4550e..8ffbc8bb03d7cc3ed9693c3c5feb727edbdc4b4c 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -82,7 +82,7 @@ struct LayoutLeft { LayoutLeft & operator = ( LayoutLeft && ) = default ; KOKKOS_INLINE_FUNCTION - constexpr + explicit constexpr LayoutLeft( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0 , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 ) : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {} @@ -114,7 +114,7 @@ struct LayoutRight { LayoutRight & operator = ( LayoutRight && ) = default ; KOKKOS_INLINE_FUNCTION - constexpr + explicit constexpr LayoutRight( size_t N0 = 0 , size_t N1 = 0 , size_t N2 = 0 , size_t N3 = 0 , size_t N4 = 0 , size_t N5 = 0 , size_t N6 = 0 , size_t N7 = 0 ) : dimension { N0 , N1 , N2 , N3 , N4 , N5 , N6 , N7 } {} @@ -132,6 +132,11 @@ struct LayoutStride { size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; + LayoutStride( LayoutStride const & ) = default ; + LayoutStride( LayoutStride && ) = default ; + LayoutStride & operator = ( LayoutStride const & ) = default ; + LayoutStride & operator = ( LayoutStride && ) = default ; + /** \brief Compute strides from ordered dimensions. * * Values of order uniquely form the set [0..rank) @@ -164,7 +169,8 @@ struct LayoutStride { return tmp ; } - KOKKOS_INLINE_FUNCTION constexpr + KOKKOS_INLINE_FUNCTION + explicit constexpr LayoutStride( size_t N0 = 0 , size_t S0 = 0 , size_t N1 = 0 , size_t S1 = 0 , size_t N2 = 0 , size_t S2 = 0 @@ -220,7 +226,7 @@ struct LayoutTileLeft { LayoutTileLeft & operator = ( LayoutTileLeft && ) = default ; KOKKOS_INLINE_FUNCTION - constexpr + explicit constexpr LayoutTileLeft( size_t argN0 = 0 , size_t argN1 = 0 , size_t argN2 = 0 , size_t argN3 = 0 , size_t argN4 = 0 , size_t argN5 = 0 , size_t argN6 = 0 , size_t argN7 = 0 ) diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 7d1e59af5e473db94a5ed6361bb3d6ee7b9b47e6..fbe699deb8191cd023b8277cdd28d501be37c3aa 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -114,11 +114,11 @@ #error "#include <cuda.h> did not define CUDA_VERSION" #endif -#if ( CUDA_VERSION < 6050 ) -// CUDA supports (inofficially) C++11 in device code starting with -// version 6.5. This includes auto type and device code internal +#if ( CUDA_VERSION < 7000 ) +// CUDA supports C++11 in device code starting with +// version 7.0. This includes auto type and device code internal // lambdas. -#error "Cuda version 6.5 or greater required" +#error "Cuda version 7.0 or greater required" #endif #if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) @@ -127,16 +127,19 @@ #endif #ifdef KOKKOS_CUDA_USE_LAMBDA -#if ( CUDA_VERSION < 7000 ) -// CUDA supports C++11 lambdas generated in host code to be given -// to the device starting with version 7.5. But the release candidate (7.5.6) -// still identifies as 7.0 -#error "Cuda version 7.5 or greater required for host-to-device Lambda support" -#endif -#if ( CUDA_VERSION < 8000 ) -#define KOKKOS_LAMBDA [=]__device__ +#if ( CUDA_VERSION < 7050 ) + // CUDA supports C++11 lambdas generated in host code to be given + // to the device starting with version 7.5. But the release candidate (7.5.6) + // still identifies as 7.0 + #error "Cuda version 7.5 or greater required for host-to-device Lambda support" +#endif +#if ( CUDA_VERSION < 8000 ) && defined(__NVCC__) + #define KOKKOS_LAMBDA [=]__device__ #else -#define KOKKOS_LAMBDA [=]__host__ __device__ + #define KOKKOS_LAMBDA [=]__host__ __device__ + #if defined( KOKKOS_HAVE_CXX1Z ) + #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ + #endif #endif #define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 #endif @@ -145,7 +148,7 @@ #if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) // Cuda version 8.0 still needs the functor wrapper - #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) + #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__) #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER #endif #endif @@ -153,13 +156,12 @@ /*--------------------------------------------------------------------------*/ /* Language info: C++, CUDA, OPENMP */ -#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) // Compiling Cuda code to 'ptx' #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline #define KOKKOS_FUNCTION __device__ __host__ - #endif /* #if defined( __CUDA_ARCH__ ) */ #if defined( _OPENMP ) @@ -184,10 +186,12 @@ #else #if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA ) + #if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either // CUDA (including version 6.5) does not support giving lambdas as // arguments to global functions. Thus its not currently possible // to dispatch lambdas from the host. #define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 + #endif #endif #endif /* #if defined( __NVCC__ ) */ @@ -195,7 +199,11 @@ #define KOKKOS_LAMBDA [=] #endif -#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */ +#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA) + #define KOKKOS_CLASS_LAMBDA [=,*this] +#endif + +//#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */ /* Intel compiler for host code */ @@ -243,7 +251,7 @@ #endif #endif -#endif /* #if ! defined( __CUDA_ARCH__ ) */ +//#endif /* #if ! defined( __CUDA_ARCH__ ) */ /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ @@ -257,6 +265,20 @@ #define KOKKOS_HAVE_PRAGMA_VECTOR 1 #define KOKKOS_HAVE_PRAGMA_SIMD 1 + #define KOKKOS_RESTRICT __restrict__ + + #ifndef KOKKOS_ALIGN + #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) + #endif + + #ifndef KOKKOS_ALIGN_PTR + #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size))) + #endif + + #ifndef KOKKOS_ALIGN_SIZE + #define KOKKOS_ALIGN_SIZE 64 + #endif + #if ( 1400 > KOKKOS_COMPILER_INTEL ) #if ( 1300 > KOKKOS_COMPILER_INTEL ) #error "Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0." @@ -264,11 +286,11 @@ #warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0." #endif #endif - #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 ) + #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 ) #define KOKKOS_ENABLE_ASM 1 #endif - #if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) #if !defined (_WIN32) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #else @@ -335,14 +357,11 @@ #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif - #if ! defined( KOKKOS_ENABLE_ASM ) && \ - ! ( defined( __powerpc) || \ - defined(__powerpc__) || \ - defined(__powerpc64__) || \ - defined(__POWERPC__) || \ - defined(__ppc__) || \ - defined(__ppc64__) || \ - defined(__PGIC__) ) + #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( __PGIC__ ) && \ + ( defined( __amd64 ) || \ + defined( __amd64__ ) || \ + defined( __x86_64 ) || \ + defined( __x86_64__ ) ) #define KOKKOS_ENABLE_ASM 1 #endif @@ -385,10 +404,30 @@ #define KOKKOS_FUNCTION /**/ #endif + +//---------------------------------------------------------------------------- +///** Define empty macro for restrict if necessary: */ + +#if ! defined(KOKKOS_RESTRICT) +#define KOKKOS_RESTRICT +#endif + //---------------------------------------------------------------------------- /** Define Macro for alignment: */ +#if ! defined KOKKOS_ALIGN_SIZE +#define KOKKOS_ALIGN_SIZE 16 +#endif + +#if ! defined(KOKKOS_ALIGN) +#define KOKKOS_ALIGN(size) __attribute__((aligned(size))) +#endif + +#if ! defined(KOKKOS_ALIGN_PTR) +#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size))) +#endif + #if ! defined(KOKKOS_ALIGN_16) -#define KOKKOS_ALIGN_16 __attribute__((aligned(16))) +#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16) #endif //---------------------------------------------------------------------------- @@ -456,10 +495,6 @@ * are no longer supported. */ -#if defined( KOKKOS_USING_DEPRECATED_VIEW ) -#error "Kokkos deprecated View has been removed" -#endif - #define KOKKOS_USING_EXP_VIEW 1 #define KOKKOS_USING_EXPERIMENTAL_VIEW diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index d843f7c9a1442f9ce1a268c04bf6395f28ed94c7..e4f895b7d310f048f9ca20b6fb2688b9fede93c8 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -49,7 +49,7 @@ #include <Kokkos_Atomic.hpp> #include <impl/Kokkos_BitOps.hpp> #include <impl/Kokkos_Error.hpp> -#include <impl/KokkosExp_SharedAlloc.hpp> +#include <impl/Kokkos_SharedAlloc.hpp> #include <limits> #include <algorithm> @@ -70,12 +70,6 @@ //#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO //#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO -// A superblock is considered full when this percentage of its pages are full. -#define KOKKOS_MEMPOOL_SB_FULL_FRACTION 0.80 - -// A page is considered full when this percentage of its blocks are full. -#define KOKKOS_MEMPOOL_PAGE_FULL_FRACTION 0.875 // 28 / 32 - //---------------------------------------------------------------------------- namespace Kokkos { @@ -128,7 +122,7 @@ struct bitset_count { dst += src; } KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & count) const + void operator()( size_type i, value_type & count ) const { count += Kokkos::Impl::bit_count( m_words[i] ); } @@ -183,7 +177,7 @@ public: size_type count() const { - size_type val; + size_type val = 0; bitset_count< Bitset > bc( m_words, m_num_words, val ); return val; } @@ -232,6 +226,20 @@ public: return atomic_fetch_and( &m_words[ word_pos ], ~mask ) & mask; } + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair< bool, word_type > + fetch_word_set( size_type i ) const + { + size_type word_pos = i >> LG_WORD_SIZE; + word_type mask = word_type(1) << ( i & WORD_MASK ); + + Kokkos::pair<bool, word_type> result; + result.second = atomic_fetch_or( &m_words[ word_pos ], mask ); + result.first = !( result.second & mask ); + + return result; + } + KOKKOS_FORCEINLINE_FUNCTION Kokkos::pair< bool, word_type > fetch_word_reset( size_type i ) const @@ -247,12 +255,10 @@ public: } KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, size_type > - set_any_in_word( size_type i, word_type & prev_val ) const + Kokkos::pair< bool, word_type > + set_any_in_word( size_type & pos ) const { - prev_val = 0; - - size_type word_pos = i >> LG_WORD_SIZE; + size_type word_pos = pos >> LG_WORD_SIZE; word_type word = volatile_load( &m_words[ word_pos ] ); // Loop until there are no more unset bits in the word. @@ -261,28 +267,26 @@ public: size_type bit = Kokkos::Impl::bit_scan_forward( ~word ); // Try to set the bit. - word_type mask = word_type(1) << bit; + word_type mask = word_type(1) << bit; word = atomic_fetch_or( &m_words[ word_pos ], mask ); if ( !( word & mask ) ) { // Successfully set the bit. - prev_val = word; + pos = ( word_pos << LG_WORD_SIZE ) + bit; - return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + return Kokkos::pair<bool, word_type>( true, word ); } } // Didn't find a free bit in this word. - return Kokkos::pair<bool, size_type>( false, i ); + return Kokkos::pair<bool, word_type>( false, word_type(0) ); } KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, size_type > - set_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const + Kokkos::pair< bool, word_type > + set_any_in_word( size_type & pos, word_type word_mask ) const { - prev_val = 0; - - size_type word_pos = i >> LG_WORD_SIZE; + size_type word_pos = pos >> LG_WORD_SIZE; word_type word = volatile_load( &m_words[ word_pos ] ); word = ( ~word ) & word_mask; @@ -292,30 +296,28 @@ public: size_type bit = Kokkos::Impl::bit_scan_forward( word ); // Try to set the bit. - word_type mask = word_type(1) << bit; + word_type mask = word_type(1) << bit; word = atomic_fetch_or( &m_words[ word_pos ], mask ); if ( !( word & mask ) ) { // Successfully set the bit. - prev_val = word; + pos = ( word_pos << LG_WORD_SIZE ) + bit; - return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + return Kokkos::pair<bool, word_type>( true, word ); } word = ( ~word ) & word_mask; } // Didn't find a free bit in this word. - return Kokkos::pair<bool, size_type>( false, i ); + return Kokkos::pair<bool, word_type>( false, word_type(0) ); } KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, size_type > - reset_any_in_word( size_type i, word_type & prev_val ) const + Kokkos::pair< bool, word_type > + reset_any_in_word( size_type & pos ) const { - prev_val = 0; - - size_type word_pos = i >> LG_WORD_SIZE; + size_type word_pos = pos >> LG_WORD_SIZE; word_type word = volatile_load( &m_words[ word_pos ] ); // Loop until there are no more set bits in the word. @@ -324,28 +326,26 @@ public: size_type bit = Kokkos::Impl::bit_scan_forward( word ); // Try to reset the bit. - word_type mask = word_type(1) << bit; + word_type mask = word_type(1) << bit; word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); if ( word & mask ) { // Successfully reset the bit. - prev_val = word; + pos = ( word_pos << LG_WORD_SIZE ) + bit; - return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + return Kokkos::pair<bool, word_type>( true, word ); } } // Didn't find a free bit in this word. - return Kokkos::pair<bool, size_type>( false, i ); + return Kokkos::pair<bool, word_type>( false, word_type(0) ); } KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, size_type > - reset_any_in_word( size_type i, word_type & prev_val, word_type word_mask ) const + Kokkos::pair< bool, word_type > + reset_any_in_word( size_type & pos, word_type word_mask ) const { - prev_val = 0; - - size_type word_pos = i >> LG_WORD_SIZE; + size_type word_pos = pos >> LG_WORD_SIZE; word_type word = volatile_load( &m_words[ word_pos ] ); word = word & word_mask; @@ -355,21 +355,21 @@ public: size_type bit = Kokkos::Impl::bit_scan_forward( word ); // Try to reset the bit. - word_type mask = word_type(1) << bit; + word_type mask = word_type(1) << bit; word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); if ( word & mask ) { // Successfully reset the bit. - prev_val = word; + pos = ( word_pos << LG_WORD_SIZE ) + bit; - return Kokkos::pair<bool, size_type>( true, ( word_pos << LG_WORD_SIZE ) + bit ); + return Kokkos::pair<bool, word_type>( true, word ); } word = word & word_mask; } // Didn't find a free bit in this word. - return Kokkos::pair<bool, size_type>( false, i ); + return Kokkos::pair<bool, word_type>( false, word_type(0) ); } }; @@ -442,7 +442,7 @@ struct create_histogram { total_allocated_blocks += page_allocated_blocks; - atomic_fetch_add( &m_page_histogram(page_allocated_blocks), 1 ); + atomic_increment( &m_page_histogram(page_allocated_blocks) ); } r.first += double(total_allocated_blocks) / blocks_per_sb; @@ -609,7 +609,7 @@ public: }; private: - typedef Impl::SharedAllocationTracker Tracker; + typedef Kokkos::Impl::SharedAllocationTracker Tracker; typedef View< uint32_t *, device_type > UInt32View; typedef View< SuperblockHeader *, device_type > SBHeaderView; @@ -726,11 +726,11 @@ public: // Allocate memory for Views. This is done here instead of at construction // so that the runtime checks can be performed before allocating memory. - resize(m_active, m_num_block_size ); - resize(m_sb_header, m_num_sb ); + resize( m_active, m_num_block_size ); + resize( m_sb_header, m_num_sb ); // Allocate superblock memory. - typedef Impl::SharedAllocationRecord< backend_memory_space, void > SharedRecord; + typedef Kokkos::Impl::SharedAllocationRecord< backend_memory_space, void > SharedRecord; SharedRecord * rec = SharedRecord::allocate( memspace, "mempool", m_total_size ); @@ -751,10 +751,15 @@ public: m_ceil_num_sb * m_num_block_size ); // Initialize all active superblocks to be invalid. - typename UInt32View::HostMirror host_active = create_mirror_view(m_active); - for (size_t i = 0; i < m_num_block_size; ++i) host_active(i) = INVALID_SUPERBLOCK; + typename UInt32View::HostMirror host_active = create_mirror_view( m_active ); + for ( size_t i = 0; i < m_num_block_size; ++i ) host_active(i) = INVALID_SUPERBLOCK; + deep_copy( m_active, host_active ); + + // A superblock is considered full when this percentage of its pages are full. + const double superblock_full_fraction = .8; - deep_copy(m_active, host_active); + // A page is considered full when this percentage of its blocks are full. + const double page_full_fraction = .875; // Initialize the blocksize info. for ( size_t i = 0; i < m_num_block_size; ++i ) { @@ -767,7 +772,7 @@ public: // Set the full level for the superblock. m_blocksize_info[i].m_sb_full_level = - static_cast<uint32_t>( pages_per_sb * KOKKOS_MEMPOOL_SB_FULL_FRACTION ); + static_cast<uint32_t>( pages_per_sb * superblock_full_fraction ); if ( m_blocksize_info[i].m_sb_full_level == 0 ) { m_blocksize_info[i].m_sb_full_level = 1; @@ -778,7 +783,7 @@ public: blocks_per_sb < BLOCKS_PER_PAGE ? blocks_per_sb : BLOCKS_PER_PAGE; m_blocksize_info[i].m_page_full_level = - static_cast<uint32_t>( blocks_per_page * KOKKOS_MEMPOOL_PAGE_FULL_FRACTION ); + static_cast<uint32_t>( blocks_per_page * page_full_fraction ); if ( m_blocksize_info[i].m_page_full_level == 0 ) { m_blocksize_info[i].m_page_full_level = 1; @@ -820,7 +825,7 @@ public: /// \brief The actual block size allocated given alloc_size. KOKKOS_INLINE_FUNCTION size_t allocate_block_size( const size_t alloc_size ) const - { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE); } + { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE ); } /// \brief Allocate a chunk of memory. /// \param alloc_size Size of the requested allocated in number of bytes. @@ -834,27 +839,41 @@ public: // Only support allocations up to the superblock size. Just return 0 // (failed allocation) for any size above this. - if (alloc_size <= m_sb_size ) + if ( alloc_size <= m_sb_size ) { int block_size_id = get_block_size_index( alloc_size ); uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb; uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + +#ifdef KOKKOS_CUDA_CLANG_WORKAROUND + // Without this test it looks like pages_per_sb might come back wrong. + if ( pages_per_sb == 0 ) return NULL; +#endif + unsigned word_size = blocks_per_sb > 32 ? 32 : blocks_per_sb; unsigned word_mask = ( uint64_t(1) << word_size ) - 1; + // Instead of forcing an atomic read to guarantee the updated value, + // reading the old value is actually beneficial because more threads will + // attempt allocations on the old active superblock instead of waiting on + // the new active superblock. This will help hide the latency of + // switching the active superblock. uint32_t sb_id = volatile_load( &m_active(block_size_id) ); - // If the active is locked, keep reading it until the lock is released. + // If the active is locked, keep reading it atomically until the lock is + // released. while ( sb_id == SUPERBLOCK_LOCK ) { - sb_id = volatile_load( &m_active(block_size_id) ); + sb_id = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) ); } + load_fence(); + bool allocation_done = false; - while (!allocation_done) { + while ( !allocation_done ) { bool need_new_sb = false; - if (sb_id != INVALID_SUPERBLOCK) { + if ( sb_id != INVALID_SUPERBLOCK ) { // Use the value from the clock register as the hash value. uint64_t hash_val = get_clock_register(); @@ -875,12 +894,11 @@ public: bool search_done = false; - while (!search_done) { - bool success; - unsigned prev_val; + while ( !search_done ) { + bool success = false; + unsigned prev_val = 0; - Kokkos::tie( success, pos ) = - m_sb_blocks.set_any_in_word( pos, prev_val, word_mask ); + Kokkos::tie( success, prev_val ) = m_sb_blocks.set_any_in_word( pos, word_mask ); if ( !success ) { if ( ++pages_searched >= pages_per_sb ) { @@ -905,6 +923,8 @@ public: } else { // Reserved a memory location to allocate. + memory_fence(); + search_done = true; allocation_done = true; @@ -918,7 +938,7 @@ public: if ( used_bits == 0 ) { // This page was empty. Decrement the number of empty pages for // the superblock. - atomic_fetch_sub( &m_sb_header(sb_id).m_empty_pages, 1 ); + atomic_decrement( &m_sb_header(sb_id).m_empty_pages ); } else if ( used_bits == m_blocksize_info[block_size_id].m_page_full_level - 1 ) { @@ -962,7 +982,7 @@ public: #ifdef KOKKOS_MEMPOOL_PRINT_INFO else { printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n", - alloc_size, m_sb_size); + alloc_size, m_sb_size ); #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST fflush( stdout ); #endif @@ -997,8 +1017,10 @@ public: uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; uint32_t pos_rel = offset >> lg_block_size; - bool success; - unsigned prev_val; + bool success = false; + unsigned prev_val = 0; + + memory_fence(); Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel ); @@ -1023,7 +1045,7 @@ public: volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) ); volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) ); - memory_fence(); + store_fence(); m_empty_sb.set( sb_id ); } @@ -1088,7 +1110,7 @@ public: printf( "\n" ); #ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO - typename SBHeaderView::HostMirror host_sb_header = create_mirror_view(m_sb_header); + typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header ); deep_copy( host_sb_header, m_sb_header ); UInt32View num_allocated_blocks( "Allocated Blocks", m_num_sb ); @@ -1101,7 +1123,7 @@ public: } typename UInt32View::HostMirror host_num_allocated_blocks = - create_mirror_view(num_allocated_blocks); + create_mirror_view( num_allocated_blocks ); deep_copy( host_num_allocated_blocks, num_allocated_blocks ); // Print header info of all superblocks. @@ -1135,7 +1157,7 @@ public: m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result ); } - typename UInt32View::HostMirror host_page_histogram = create_mirror_view(page_histogram); + typename UInt32View::HostMirror host_page_histogram = create_mirror_view( page_histogram ); deep_copy( host_page_histogram, page_histogram ); // Find the used and total pages and blocks. @@ -1158,8 +1180,8 @@ public: double percent_used_blocks = total_blocks == 0 ? 0.0 : double(used_blocks) / total_blocks; // Count active superblocks. - typename UInt32View::HostMirror host_active = create_mirror_view(m_active); - deep_copy(host_active, m_active); + typename UInt32View::HostMirror host_active = create_mirror_view( m_active ); + deep_copy( host_active, m_active ); unsigned num_active_sb = 0; for ( size_t i = 0; i < m_num_block_size; ++i ) { @@ -1224,6 +1246,7 @@ public: // Print the blocks used for each page of a few individual superblocks. for ( uint32_t i = 0; i < num_sb_id; ++i ) { uint32_t lg_block_size = host_sb_header(sb_id[i]).m_lg_block_size; + if ( lg_block_size != 0 ) { printf( "SB_ID BLOCK ID USED_BLOCKS\n" ); @@ -1249,16 +1272,16 @@ public: #endif printf( " Used blocks: %10u / %10u = %10.6lf\n", used_blocks, total_blocks, - percent_used_blocks ); + percent_used_blocks ); printf( " Used pages: %10u / %10u = %10.6lf\n", used_pages, total_pages, - percent_used_pages ); + percent_used_pages ); printf( " Used SB: %10zu / %10zu = %10.6lf\n", m_num_sb - num_empty_sb, m_num_sb, - percent_used_sb ); + percent_used_sb ); printf( " Active SB: %10u\n", num_active_sb ); printf( " Empty SB: %10u\n", num_empty_sb ); printf( " Partfull SB: %10u\n", num_partfull_sb ); printf( " Full SB: %10lu\n", - m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb ); + m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb ); printf( "Ave. SB Full %%: %10.6lf\n", ave_sb_full ); printf( "\n" ); fflush( stdout ); @@ -1316,6 +1339,8 @@ private: uint32_t lock_sb = Kokkos::atomic_compare_exchange( &m_active(block_size_id), old_sb, SUPERBLOCK_LOCK ); + load_fence(); + // Initialize the new superblock to be the previous one so the previous // superblock is returned if a new superblock can't be found. uint32_t new_sb = lock_sb; @@ -1334,11 +1359,11 @@ private: // size's bitset. unsigned pos = block_size_id * m_ceil_num_sb; - while (!search_done) { + while ( !search_done ) { bool success = false; - unsigned prev_val; + unsigned prev_val = 0; - Kokkos::tie( success, pos ) = m_partfull_sb.reset_any_in_word( pos, prev_val ); + Kokkos::tie( success, prev_val ) = m_partfull_sb.reset_any_in_word( pos ); if ( !success ) { if ( ++tries >= max_tries ) { @@ -1351,22 +1376,21 @@ private: } else { // Found a superblock. + + // It is possible that the newly found superblock is the same as the + // old superblock. In this case putting the old value back in yields + // correct behavior. This could happen as follows. This thread + // grabs the lock and transitions the superblock to the full state. + // Before it searches for a new superblock, other threads perform + // enough deallocations to transition the superblock to the partially + // full state. This thread then searches for a partially full + // superblock and finds the one it removed. There's potential for + // this to cause a performance issue if the same superblock keeps + // being removed and added due to the right mix and ordering of + // allocations and deallocations. search_done = true; new_sb = pos - block_size_id * m_ceil_num_sb; - // Assertions: - // 1. A different superblock than the current should be found. -#ifdef KOKKOS_MEMPOOL_PRINTERR - if ( new_sb == lock_sb ) { - printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n", - new_sb); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - Kokkos::abort( "" ); - } -#endif - // Set the head status for the superblock. volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) ); @@ -1376,7 +1400,7 @@ private: volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); } - memory_fence(); + store_fence(); } } @@ -1389,11 +1413,11 @@ private: // size's bitset. pos = 0; - while (!search_done) { + while ( !search_done ) { bool success = false; - unsigned prev_val; + unsigned prev_val = 0; - Kokkos::tie( success, pos ) = m_empty_sb.reset_any_in_word( pos, prev_val ); + Kokkos::tie( success, prev_val ) = m_empty_sb.reset_any_in_word( pos ); if ( !success ) { if ( ++tries >= max_tries ) { @@ -1406,22 +1430,22 @@ private: } else { // Found a superblock. + + // It is possible that the newly found superblock is the same as + // the old superblock. In this case putting the old value back in + // yields correct behavior. This could happen as follows. This + // thread grabs the lock and transitions the superblock to the full + // state. Before it searches for a new superblock, other threads + // perform enough deallocations to transition the superblock to the + // partially full state and then the empty state. This thread then + // searches for a partially full superblock and none exist. This + // thread then searches for an empty superblock and finds the one + // it removed. The likelihood of this happening is so remote that + // the potential for this to cause a performance issue is + // infinitesimal. search_done = true; new_sb = pos; - // Assertions: - // 1. A different superblock than the current should be found. -#ifdef KOKKOS_MEMPOOL_PRINTERR - if ( new_sb == lock_sb ) { - printf( "\n** MemoryPool::find_superblock() FOUND_SAME_SUPERBLOCK: %u **\n", - new_sb); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - Kokkos::abort( "" ); - } -#endif - // Set the empty pages, block size, and head status for the // superblock. volatile_store( &m_sb_header(new_sb).m_empty_pages, @@ -1436,7 +1460,7 @@ private: volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); } - memory_fence(); + store_fence(); } } } @@ -1445,14 +1469,17 @@ private: atomic_exchange( &m_active(block_size_id), new_sb ); } else { - // Either another thread has the lock and is switching the active superblock for - // this block size or another thread has already changed the active superblock - // since this thread read its value. Keep reading the active superblock until - // it isn't locked to get the new active superblock. + // Either another thread has the lock and is switching the active + // superblock for this block size or another thread has already changed + // the active superblock since this thread read its value. Keep + // atomically reading the active superblock until it isn't locked to get + // the new active superblock. do { - new_sb = volatile_load( &m_active(block_size_id) ); + new_sb = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) ); } while ( new_sb == SUPERBLOCK_LOCK ); + load_fence(); + // Assertions: // 1. An invalid superblock should never be found here. // 2. If the new superblock is the same as the previous superblock, the @@ -1477,14 +1504,25 @@ private: { #if defined( __CUDA_ARCH__ ) // Return value of 64-bit hi-res clock register. - return clock64(); + return clock64(); #elif defined( __i386__ ) || defined( __x86_64 ) // Return value of 64-bit hi-res clock register. - unsigned a, d; - __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); - return ( (uint64_t) a) | ( ( (uint64_t) d ) << 32 ); + unsigned a = 0, d = 0; + + __asm__ volatile( "rdtsc" : "=a" (a), "=d" (d) ); + + return ( (uint64_t) a ) | ( ( (uint64_t) d ) << 32 ); +#elif defined( __powerpc ) || defined( __powerpc__ ) || defined( __powerpc64__ ) || \ + defined( __POWERPC__ ) || defined( __ppc__ ) || defined( __ppc64__ ) + unsigned int cycles = 0; + + asm volatile( "mftb %0" : "=r" (cycles) ); + + return (uint64_t) cycles; #else - const uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + const uint64_t ticks = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + return ticks; #endif } @@ -1517,7 +1555,4 @@ private: #undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO #endif -#undef KOKKOS_MEMPOOL_SB_FULL_FRACTION -#undef KOKKOS_MEMPOOL_PAGE_FULL_FRACTION - #endif // KOKKOS_MEMORYPOOL_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp index 5ee1f16fec854fc0ee45e39c488095fdee73ed4f..94b58b8affe1921f2bfa9faf1e25b3dc303c5220 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -63,6 +63,8 @@ enum MemoryTraitsFlags { Unmanaged = 0x01 , RandomAccess = 0x02 , Atomic = 0x04 + , Restrict = 0x08 + , Aligned = 0x10 }; template < unsigned T > @@ -73,6 +75,8 @@ struct MemoryTraits { enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) }; enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) }; enum { Atomic = T & unsigned(Kokkos::Atomic) }; + enum { Restrict = T & unsigned(Kokkos::Restrict) }; + enum { Aligned = T & unsigned(Kokkos::Aligned) }; }; diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index 7be4f8245f98ea464d8a27313c13c7aa35be4e46..0e6c6d84fe5199f3ea9a554e604d49c3d71c1380 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -58,7 +58,7 @@ #endif #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_Parallel.hpp> -#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> #include <impl/Kokkos_Tags.hpp> @@ -160,6 +160,17 @@ public: namespace Kokkos { namespace Impl { +template<> +struct MemorySpaceAccess + < Kokkos::OpenMP::memory_space + , Kokkos::OpenMP::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::OpenMP::memory_space diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 695bc79a1ab900405a160843d8777651dc63cb22..3a73e8a8170fe3729500adbc263137856378170f 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -53,7 +53,8 @@ struct is_reducer_type { template<class T> struct is_reducer_type<T,typename std::enable_if< - std::is_same<T,typename T::reducer_type>::value + std::is_same<typename std::remove_cv<T>::type, + typename std::remove_cv<typename T::reducer_type>::type>::value >::type> { enum { value = 1 }; }; @@ -726,6 +727,119 @@ public: } }; +template<class Scalar> +struct MinMaxScalar { + Scalar min_val,max_val; + + KOKKOS_INLINE_FUNCTION + void operator = (const MinMaxScalar& rhs) { + min_val = rhs.min_val; + max_val = rhs.max_val; + } + + KOKKOS_INLINE_FUNCTION + void operator = (const volatile MinMaxScalar& rhs) volatile { + min_val = rhs.min_val; + max_val = rhs.max_val; + } +}; + +template<class Scalar, class Space = HostSpace> +struct MinMax { +private: + typedef typename std::remove_cv<Scalar>::type scalar_type; + +public: + //Required + typedef MinMax reducer_type; + typedef MinMaxScalar<scalar_type> value_type; + + typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + + scalar_type min_init_value; + scalar_type max_init_value; + +private: + result_view_type result; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct MinInitWrapper; + + template<class ValueType > + struct MinInitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::max(); + } + }; + + template<class ValueType > + struct MinInitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + + template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > + struct MaxInitWrapper; + + template<class ValueType > + struct MaxInitWrapper<ValueType,true> { + static ValueType value() { + return std::numeric_limits<scalar_type>::min(); + } + }; + + template<class ValueType > + struct MaxInitWrapper<ValueType,false> { + static ValueType value() { + return scalar_type(); + } + }; + +public: + + MinMax(value_type& result_): + min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(&result_) {} + MinMax(const result_view_type& result_): + min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(result_) {} + MinMax(value_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): + min_init_value(min_init_value_),max_init_value(max_init_value_),result(&result_) {} + MinMax(const result_view_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): + min_init_value(min_init_value_),max_init_value(max_init_value_),result(result_) {} + + //Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + if ( src.min_val < dest.min_val ) { + dest.min_val = src.min_val; + } + if ( src.max_val > dest.max_val ) { + dest.max_val = src.max_val; + } + } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& dest, const volatile value_type& src) const { + if ( src.min_val < dest.min_val ) { + dest.min_val = src.min_val; + } + if ( src.max_val > dest.max_val ) { + dest.max_val = src.max_val; + } + } + + //Optional + KOKKOS_INLINE_FUNCTION + void init( value_type& val) const { + val.min_val = min_init_value; + val.max_val = max_init_value; + } + + result_view_type result_view() const { + return result; + } +}; + template<class Scalar, class Index> struct MinMaxLocScalar { Scalar min_val,max_val; @@ -1124,7 +1238,8 @@ void parallel_reduce(const PolicyType& policy, typename Impl::enable_if< Kokkos::Impl::is_execution_policy<PolicyType>::value >::type * = 0) { - Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute("",policy,functor,return_value); + ReturnType return_value_impl = return_value; + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute("",policy,functor,return_value_impl); } template< class FunctorType, class ReturnType > @@ -1133,8 +1248,8 @@ void parallel_reduce(const size_t& policy, const FunctorType& functor, const ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; - - Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute("",policy_type(0,policy),functor,return_value); + ReturnType return_value_impl = return_value; + Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute("",policy_type(0,policy),functor,return_value_impl); } template< class FunctorType, class ReturnType > @@ -1144,7 +1259,8 @@ void parallel_reduce(const std::string& label, const FunctorType& functor, const ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType<void,size_t,FunctorType>::policy_type policy_type; - Impl::ParallelReduceAdaptor<policy_type,FunctorType,const ReturnType>::execute(label,policy_type(0,policy),functor,return_value); + ReturnType return_value_impl = return_value; + Impl::ParallelReduceAdaptor<policy_type,FunctorType,ReturnType>::execute(label,policy_type(0,policy),functor,return_value_impl); } // No Return Argument diff --git a/lib/kokkos/core/src/Kokkos_Qthread.hpp b/lib/kokkos/core/src/Kokkos_Qthread.hpp index d61f8d518e6641debd19d4975b2535a6bfbcad8f..c58518b0654bb3267a12041a2ab7fef4e2375972 100644 --- a/lib/kokkos/core/src/Kokkos_Qthread.hpp +++ b/lib/kokkos/core/src/Kokkos_Qthread.hpp @@ -144,6 +144,17 @@ public: namespace Kokkos { namespace Impl { +template<> +struct MemorySpaceAccess + < Kokkos::Qthread::memory_space + , Kokkos::Qthread::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::Qthread::memory_space diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index 233b56c93956f7898346780d1bfe327fd11afb03..914edbc7c4640001d95affc7e1e6175b0dfe2de6 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -50,7 +50,7 @@ #include <cstddef> #include <iosfwd> #include <Kokkos_Parallel.hpp> -#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> #include <Kokkos_HostSpace.hpp> #include <Kokkos_ScratchSpace.hpp> @@ -59,7 +59,6 @@ #include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> - #include <KokkosExp_MDRangePolicy.hpp> #if defined( KOKKOS_HAVE_SERIAL ) @@ -192,6 +191,17 @@ public: namespace Kokkos { namespace Impl { +template<> +struct MemorySpaceAccess + < Kokkos::Serial::memory_space + , Kokkos::Serial::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::Serial::memory_space @@ -250,7 +260,6 @@ public: const scratch_memory_space & thread_scratch(int) const { return m_space ; } - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } @@ -306,10 +315,9 @@ public: } // namespace Impl - /* * < Kokkos::Serial , WorkArgTag > - * < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type > + * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type > * */ namespace Impl { @@ -402,7 +410,6 @@ public: , m_chunk_size ( 32 ) {} - inline int chunk_size() const { return m_chunk_size ; } /** \brief set chunk_size to a discrete value*/ @@ -525,7 +532,6 @@ private: const ReducerType m_reducer ; const pointer_type m_result_ptr ; - template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type @@ -895,20 +901,22 @@ struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> { } // namespace Impl -template<typename iType> +template< typename iType > KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember> TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, count ); } -template<typename iType> +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember> -TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end ) +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::SerialTeamMember > +TeamThreadRange( const Impl::SerialTeamMember& thread, const iType1 & begin, const iType2 & end ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end); + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, iType(begin), iType(end) ); } template<typename iType> @@ -1113,4 +1121,3 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- - diff --git a/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp index fc9113b75052e91fc260f95725fe360b98e548e8..05ed5103b874d3f8912f1e8ca6e0559967dbd86a 100644 --- a/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskPolicy.hpp @@ -41,1069 +41,7 @@ //@HEADER */ -// Experimental unified task-data parallel manycore LDRD +// For backward compatibility: -#ifndef KOKKOS_TASKPOLICY_HPP -#define KOKKOS_TASKPOLICY_HPP - -//---------------------------------------------------------------------------- - -#include <Kokkos_Core_fwd.hpp> - -// If compiling with CUDA then must be using CUDA 8 or better -// and use relocateable device code to enable the task policy. -// nvcc relocatable device code option: --relocatable-device-code=true - -#if ( defined( KOKKOS_COMPILER_NVCC ) ) - #if ( 8000 <= CUDA_VERSION ) && \ - defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE ) - - #define KOKKOS_ENABLE_TASKPOLICY - - #endif -#else - -#define KOKKOS_ENABLE_TASKPOLICY - -#endif - - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -//---------------------------------------------------------------------------- - -#include <Kokkos_MemoryPool.hpp> -#include <impl/Kokkos_Tags.hpp> -#include <impl/Kokkos_TaskQueue.hpp> - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -enum TaskType { TaskTeam = Impl::TaskBase<void,void,void>::TaskTeam - , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle }; - -enum TaskPriority { TaskHighPriority = 0 - , TaskRegularPriority = 1 - , TaskLowPriority = 2 }; - -template< typename Space > -class TaskPolicy ; - -template< typename Space > -void wait( TaskPolicy< Space > const & ); - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/*\brief Implementation data for task data management, access, and execution. - * - * CRTP Inheritance structure to allow static_cast from the - * task root type and a task's FunctorType. - * - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * TaskBase< Space , ResultType , void > - * : TaskBase< Space , void , void > - * { ... }; - */ -template< typename Space , typename ResultType , typename FunctorType > -class TaskBase ; - -template< typename Space > -class TaskExec ; - -}} // namespace Kokkos::Impl - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** - * - * Future< space > // value_type == void - * Future< value > // space == Default - * Future< value , space > - * - */ -template< typename Arg1 /* = void */ , typename Arg2 /* = void */ > -class Future { -private: - - template< typename > friend class TaskPolicy ; - template< typename , typename > friend class Future ; - template< typename , typename , typename > friend class Impl::TaskBase ; - - enum { Arg1_is_space = Kokkos::Impl::is_space< Arg1 >::value }; - enum { Arg2_is_space = Kokkos::Impl::is_space< Arg2 >::value }; - enum { Arg1_is_value = ! Arg1_is_space && - ! std::is_same< Arg1 , void >::value }; - enum { Arg2_is_value = ! Arg2_is_space && - ! std::is_same< Arg2 , void >::value }; - - static_assert( ! ( Arg1_is_space && Arg2_is_space ) - , "Future cannot be given two spaces" ); - - static_assert( ! ( Arg1_is_value && Arg2_is_value ) - , "Future cannot be given two value types" ); - - using ValueType = - typename std::conditional< Arg1_is_value , Arg1 , - typename std::conditional< Arg2_is_value , Arg2 , void - >::type >::type ; - - using Space = - typename std::conditional< Arg1_is_space , Arg1 , - typename std::conditional< Arg2_is_space , Arg2 , void - >::type >::type ; - - using task_base = Impl::TaskBase< Space , ValueType , void > ; - using queue_type = Impl::TaskQueue< Space > ; - - task_base * m_task ; - - KOKKOS_INLINE_FUNCTION explicit - Future( task_base * task ) : m_task(0) - { if ( task ) queue_type::assign( & m_task , task ); } - - //---------------------------------------- - -public: - - using execution_space = typename Space::execution_space ; - using value_type = ValueType ; - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task ; } - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return 0 != m_task ? m_task->reference_count() : 0 ; } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - ~Future() { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr Future() noexcept : m_task(0) {} - - KOKKOS_INLINE_FUNCTION - Future( Future && rhs ) - : m_task( rhs.m_task ) { rhs.m_task = 0 ; } - - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future && rhs ) - { - if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - //---------------------------------------- - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( Future<A1,A2> && rhs ) - : m_task( rhs.m_task ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future<A1,A2>::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future<A1,A2>::value_type >::value - , "Assigned Futures must have the same value_type" ); - - rhs.m_task = 0 ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( const Future<A1,A2> & rhs ) - : m_task(0) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future<A1,A2>::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future<A1,A2>::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future<A1,A2> & rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future<A1,A2>::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future<A1,A2>::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future<A1,A2> && rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future<A1,A2>::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future<A1,A2>::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( m_task ) queue_type::assign( & m_task , (task_base*) 0 ); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - typename task_base::get_return_type - get() const - { - if ( 0 == m_task ) { - Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); - } - return m_task->get(); - } -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< typename ExecSpace > -class TaskPolicy -{ -private: - - using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ; - using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; - using task_base = Impl::TaskBase< ExecSpace , void , void > ; - - track_type m_track ; - queue_type * m_queue ; - - //---------------------------------------- - // Process optional arguments to spawn and respawn functions - - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const ) {} - - // TaskTeam or TaskSingle - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskType const & arg - , Options const & ... opts ) - { - task->m_task_type = arg ; - assign( task , opts ... ); - } - - // TaskHighPriority or TaskRegularPriority or TaskLowPriority - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskPriority const & arg - , Options const & ... opts ) - { - task->m_priority = arg ; - assign( task , opts ... ); - } - - // Future for a dependence - template< typename A1 , typename A2 , typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , Future< A1 , A2 > const & arg - , Options const & ... opts ) - { - // Assign dependence to task->m_next - // which will be processed within subsequent call to schedule. - // Error if the dependence is reset. - - if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) { - Kokkos::abort("TaskPolicy ERROR: resetting task dependence"); - } - - if ( 0 != arg.m_task ) { - // The future may be destroyed upon returning from this call - // so increment reference count to track this assignment. - Kokkos::atomic_fetch_add( &(arg.m_task->m_ref_count) , 1 ); - } - - assign( task , opts ... ); - } - - //---------------------------------------- - -public: - - using execution_policy = TaskPolicy ; - using execution_space = ExecSpace ; - using memory_space = typename queue_type::memory_space ; - using member_type = Kokkos::Impl::TaskExec< ExecSpace > ; - - KOKKOS_INLINE_FUNCTION - TaskPolicy() : m_track(), m_queue(0) {} - - KOKKOS_INLINE_FUNCTION - TaskPolicy( TaskPolicy && rhs ) = default ; - - KOKKOS_INLINE_FUNCTION - TaskPolicy( TaskPolicy const & rhs ) = default ; - - KOKKOS_INLINE_FUNCTION - TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - - KOKKOS_INLINE_FUNCTION - TaskPolicy & operator = ( TaskPolicy const & rhs ) = default ; - - TaskPolicy( memory_space const & arg_memory_space - , unsigned const arg_memory_pool_capacity - , unsigned const arg_memory_pool_log2_superblock = 12 ) - : m_track() - , m_queue(0) - { - typedef Kokkos::Experimental::Impl::SharedAllocationRecord - < memory_space , typename queue_type::Destroy > - record_type ; - - record_type * record = - record_type::allocate( arg_memory_space - , "TaskQueue" - , sizeof(queue_type) - ); - - m_queue = new( record->data() ) - queue_type( arg_memory_space - , arg_memory_pool_capacity - , arg_memory_pool_log2_superblock ); - - record->m_destroy.m_queue = m_queue ; - - m_track.assign_allocated_record_to_uninitialized( record ); - } - - //---------------------------------------- - /**\brief Allocation size for a spawned task */ - template< typename FunctorType > - KOKKOS_FUNCTION - size_t spawn_allocation_size() const - { - using task_type = Impl::TaskBase< execution_space - , typename FunctorType::value_type - , FunctorType > ; - - return m_queue->allocate_block_size( sizeof(task_type) ); - } - - /**\brief Allocation size for a when_all aggregate */ - KOKKOS_FUNCTION - size_t when_all_allocation_size( int narg ) const - { - using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; - - return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) ); - } - - //---------------------------------------- - - /**\brief A task spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - KOKKOS_FUNCTION - Future< typename FunctorType::value_type , ExecSpace > - task_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const - { - using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; - - //---------------------------------------- - // Give single-thread back-ends an opportunity to clear - // queue of ready tasks before allocating a new task - - m_queue->iff_single_thread_recursive_execute(); - - //---------------------------------------- - - future_type f ; - - // Allocate task from memory pool - f.m_task = - reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type))); - - if ( f.m_task ) { - - // Placement new construction - new ( f.m_task ) task_type( arg_functor ); - - // Reference count starts at two - // +1 for matching decrement when task is complete - // +1 for future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); - - assign( f.m_task , arg_options... ); - - // Spawning from within the execution space so the - // apply function pointer is guaranteed to be valid - f.m_task->m_apply = task_type::apply ; - - m_queue->schedule( f.m_task ); - // this task may be updated or executed at any moment - } - - return f ; - } - - /**\brief The host process spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - inline - Future< typename FunctorType::value_type , ExecSpace > - host_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const - { - using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; - - future_type f ; - - // Allocate task from memory pool - f.m_task = - reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) ); - - if ( f.m_task ) { - - // Placement new construction - new( f.m_task ) task_type( arg_functor ); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); - - assign( f.m_task , arg_options... ); - - // Potentially spawning outside execution space so the - // apply function pointer must be obtained from execution space. - // Required for Cuda execution space function pointer. - queue_type::specialization::template - proc_set_apply< FunctorType >( & f.m_task->m_apply ); - - m_queue->schedule( f.m_task ); - } - return f ; - } - - /**\brief Return a future that is complete - * when all input futures are complete. - */ - template< typename A1 , typename A2 > - KOKKOS_FUNCTION - Future< ExecSpace > - when_all( int narg , Future< A1 , A2 > const * const arg ) const - { - static_assert - ( std::is_same< execution_space - , typename Future< A1 , A2 >::execution_space - >::value - , "Future must have same execution space" ); - - using future_type = Future< ExecSpace > ; - using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; - - future_type f ; - - size_t const size = sizeof(task_base) + narg * sizeof(task_base*); - - f.m_task = - reinterpret_cast< task_base * >( m_queue->allocate( size ) ); - - if ( f.m_task ) { - - new( f.m_task ) task_base(); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; - - task_base ** const dep = f.m_task->aggregate_dependences(); - - // Assign dependences to increment their reference count - // The futures may be destroyed upon returning from this call - // so increment reference count to track this assignment. - - for ( int i = 0 ; i < narg ; ++i ) { - task_base * const t = dep[i] = arg[i].m_task ; - if ( 0 != t ) { - Kokkos::atomic_fetch_add( &(t->m_ref_count) , 1 ); - } - } - - m_queue->schedule( f.m_task ); - // this when_all may be processed at any moment - } - - return f ; - } - - /**\brief An executing task respawns itself with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - */ - template< class FunctorType , typename ... Options > - KOKKOS_FUNCTION - void respawn( FunctorType * task_self - , Options const & ... arg_options ) const - { - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; - - task_base * const zero = (task_base *) 0 ; - task_base * const lock = (task_base *) task_base::LockTag ; - task_type * const task = static_cast< task_type * >( task_self ); - - // Precondition: - // task is in Executing state - // therefore m_next == LockTag - // - // Change to m_next == 0 for no dependence - - if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) { - Kokkos::abort("TaskPolicy::respawn ERROR: already respawned"); - } - - assign( task , arg_options... ); - - // Postcondition: - // task is in Executing-Respawn state - // therefore m_next == dependece or 0 - } - - //---------------------------------------- - - template< typename S > - friend - void Kokkos::wait( Kokkos::TaskPolicy< S > const & ); - - //---------------------------------------- - - inline - int allocation_capacity() const noexcept - { return m_queue->m_memory.get_mem_size(); } - - KOKKOS_INLINE_FUNCTION - int allocated_task_count() const noexcept - { return m_queue->m_count_alloc ; } - - KOKKOS_INLINE_FUNCTION - int allocated_task_count_max() const noexcept - { return m_queue->m_max_alloc ; } - - KOKKOS_INLINE_FUNCTION - long allocated_task_count_accum() const noexcept - { return m_queue->m_accum_alloc ; } - -}; - -template< typename ExecSpace > -inline -void wait( TaskPolicy< ExecSpace > const & policy ) -{ policy.m_queue->execute(); } - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -struct FutureValueTypeIsVoidError {}; - -template < class ExecSpace , class ResultType , class FunctorType > -class TaskMember ; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -/**\brief States of a task */ -enum TaskState - { TASK_STATE_NULL = 0 ///< Does not exist - , TASK_STATE_CONSTRUCTING = 1 ///< Is under construction - , TASK_STATE_WAITING = 2 ///< Is waiting for execution - , TASK_STATE_EXECUTING = 4 ///< Is executing - , TASK_STATE_COMPLETE = 8 ///< Execution is complete - }; - -/**\brief Tag for Future<Latch,Space> - */ -struct Latch {}; - -/** - * - * Future< space > // value_type == void - * Future< value > // space == Default - * Future< value , space > - * - */ -template< class Arg1 = void , class Arg2 = void > -class Future { -private: - - template< class , class , class > friend class Impl::TaskMember ; - template< class > friend class TaskPolicy ; - template< class , class > friend class Future ; - - // Argument #2, if not void, must be the space. - enum { Arg1_is_space = Kokkos::Impl::is_execution_space< Arg1 >::value }; - enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value }; - enum { Arg2_is_void = std::is_same< Arg2 , void >::value }; - - struct ErrorNoExecutionSpace {}; - - enum { Opt1 = Arg1_is_space && Arg2_is_void - , Opt2 = ! Arg1_is_space && Arg2_is_void - , Opt3 = ! Arg1_is_space && Arg2_is_space - , OptOK = Kokkos::Impl::StaticAssert< Opt1 || Opt2 || Opt3 , ErrorNoExecutionSpace >::value - }; - - typedef typename - Kokkos::Impl::if_c< Opt2 || Opt3 , Arg1 , void >::type - ValueType ; - - typedef typename - Kokkos::Impl::if_c< Opt1 , Arg1 , typename - Kokkos::Impl::if_c< Opt2 , Kokkos::DefaultExecutionSpace , typename - Kokkos::Impl::if_c< Opt3 , Arg2 , void - >::type >::type >::type - ExecutionSpace ; - - typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ; - typedef Impl::TaskMember< ExecutionSpace , ValueType , void > TaskValue ; - - TaskRoot * m_task ; - - KOKKOS_INLINE_FUNCTION explicit - Future( TaskRoot * task ) - : m_task(0) - { TaskRoot::assign( & m_task , TaskRoot::template verify_type< ValueType >( task ) ); } - - //---------------------------------------- - -public: - - typedef ValueType value_type; - typedef ExecutionSpace execution_space ; - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - TaskState get_task_state() const - { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; } - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task ; } - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return 0 != m_task ? m_task->reference_count() : 0 ; } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - ~Future() { TaskRoot::assign( & m_task , 0 ); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - Future() : m_task(0) {} - - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { TaskRoot::assign( & m_task , rhs.m_task ); } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; } - - //---------------------------------------- - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( const Future<A1,A2> & rhs ) - : m_task(0) - { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future<A1,A2> & rhs ) - { TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); return *this ; } - - //---------------------------------------- - - typedef typename TaskValue::get_result_type get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const - { - if ( 0 == m_task ) { - Kokkos::abort( "Kokkos::Experimental::Future::get ERROR: is_null()"); - } - return static_cast<TaskValue*>( m_task )->get(); - } - - //---------------------------------------- -}; - -template< class Arg2 > -class Future< Latch , Arg2 > { -private: - - template< class , class , class > friend class Impl::TaskMember ; - template< class > friend class TaskPolicy ; - template< class , class > friend class Future ; - - // Argument #2, if not void, must be the space. - enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value }; - enum { Arg2_is_void = std::is_same< Arg2 , void >::value }; - - static_assert( Arg2_is_space || Arg2_is_void - , "Future template argument #2 must be a space" ); - - typedef typename - std::conditional< Arg2_is_space , Arg2 , Kokkos::DefaultExecutionSpace > - ::type ExecutionSpace ; - - typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ; - - TaskRoot * m_task ; - - KOKKOS_INLINE_FUNCTION explicit - Future( TaskRoot * task ) - : m_task(0) - { TaskRoot::assign( & m_task , task ); } - - //---------------------------------------- - -public: - - typedef void value_type; - typedef ExecutionSpace execution_space ; - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - void add( const int k ) const - { if ( 0 != m_task ) m_task->latch_add(k); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - TaskState get_task_state() const - { return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; } - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task ; } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - ~Future() { TaskRoot::assign( & m_task , 0 ); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - Future() : m_task(0) {} - - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { TaskRoot::assign( & m_task , rhs.m_task ); } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { TaskRoot::assign( & m_task , rhs.m_task ); return *this ; } - - //---------------------------------------- - - typedef void get_result_type ; - - KOKKOS_INLINE_FUNCTION - void get() const {} - - //---------------------------------------- - -}; - -namespace Impl { - -template< class T > -struct is_future : public std::false_type {}; - -template< class Arg0 , class Arg1 > -struct is_future< Kokkos::Experimental::Future<Arg0,Arg1> > - : public std::true_type {}; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -/** \brief If the argument is an execution space then a serial task in that space */ -template< class Arg0 = Kokkos::DefaultExecutionSpace > -class TaskPolicy { -public: - - typedef typename Arg0::execution_space execution_space ; - - //---------------------------------------- - - TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity = 4 - , const unsigned arg_task_team_size = 0 /* choose default */ - ); - - TaskPolicy() = default ; - TaskPolicy( TaskPolicy && rhs ) = default ; - TaskPolicy( const TaskPolicy & rhs ) = default ; - TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; - - //---------------------------------------- - /** \brief Create a serial task with storage for dependences. - * - * Postcondition: Task is in the 'constructing' state. - */ - template< class FunctorType > - Future< typename FunctorType::value_type , execution_space > - create( const FunctorType & functor - , const unsigned dependence_capacity /* = default */ ); - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - create_team( const FunctorType & functor - , const unsigned dependence_capacity /* = default */ ); - - /** \brief Set dependence that 'after' cannot start execution - * until 'before' has completed. - * - * Precondition: The 'after' task must be in then 'Constructing' state. - */ - template< class TA , class TB > - void add_dependence( const Future<TA,execution_space> & after - , const Future<TB,execution_space> & before ) const ; - - /** \brief Spawn a task in the 'Constructing' state - * - * Precondition: Task is in the 'constructing' state. - * Postcondition: Task is waiting, executing, or complete. - */ - template< class T > - const Future<T,execution_space> & - spawn( const Future<T,execution_space> & ) const ; - - //---------------------------------------- - /** \brief Query dependence of an executing task */ - - template< class FunctorType > - Future< execution_space > - get_dependence( FunctorType * , const int ) const ; - - //---------------------------------------- - /** \brief Clear current dependences of an executing task - * in preparation for setting new dependences and - * respawning. - * - * Precondition: The functor must be a task in the executing state. - */ - template< class FunctorType > - void clear_dependence( FunctorType * ) const ; - - /** \brief Set dependence that 'after' cannot resume execution - * until 'before' has completed. - * - * The 'after' functor must be in the executing state - */ - template< class FunctorType , class TB > - void add_dependence( FunctorType * after - , const Future<TB,execution_space> & before ) const ; - - /** \brief Respawn (reschedule) an executing task to be called again - * after all dependences have completed. - */ - template< class FunctorType > - void respawn( FunctorType * ) const ; -}; - -//---------------------------------------------------------------------------- -/** \brief Create and spawn a single-thread task */ -template< class ExecSpace , class FunctorType > -inline -Future< typename FunctorType::value_type , ExecSpace > -spawn( TaskPolicy<ExecSpace> & policy , const FunctorType & functor ) -{ return policy.spawn( policy.create( functor ) ); } - -/** \brief Create and spawn a single-thread task with dependences */ -template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 > -inline -Future< typename FunctorType::value_type , ExecSpace > -spawn( TaskPolicy<ExecSpace> & policy - , const FunctorType & functor - , const Future<Arg0,Arg1> & before_0 - , const Future<Arg0,Arg1> & before_1 ) -{ - Future< typename FunctorType::value_type , ExecSpace > f ; - f = policy.create( functor , 2 ); - policy.add_dependence( f , before_0 ); - policy.add_dependence( f , before_1 ); - policy.spawn( f ); - return f ; -} - -//---------------------------------------------------------------------------- -/** \brief Create and spawn a parallel_for task */ -template< class ExecSpace , class ParallelPolicyType , class FunctorType > -inline -Future< typename FunctorType::value_type , ExecSpace > -spawn_foreach( TaskPolicy<ExecSpace> & task_policy - , const ParallelPolicyType & parallel_policy - , const FunctorType & functor ) -{ return task_policy.spawn( task_policy.create_foreach( parallel_policy , functor ) ); } - -/** \brief Create and spawn a parallel_reduce task */ -template< class ExecSpace , class ParallelPolicyType , class FunctorType > -inline -Future< typename FunctorType::value_type , ExecSpace > -spawn_reduce( TaskPolicy<ExecSpace> & task_policy - , const ParallelPolicyType & parallel_policy - , const FunctorType & functor ) -{ return task_policy.spawn( task_policy.create_reduce( parallel_policy , functor ) ); } - -//---------------------------------------------------------------------------- -/** \brief Respawn a task functor with dependences */ -template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 > -inline -void respawn( TaskPolicy<ExecSpace> & policy - , FunctorType * functor - , const Future<Arg0,Arg1> & before_0 - , const Future<Arg0,Arg1> & before_1 - ) -{ - policy.clear_dependence( functor ); - policy.add_dependence( functor , before_0 ); - policy.add_dependence( functor , before_1 ); - policy.respawn( functor ); -} - -//---------------------------------------------------------------------------- - -template< class ExecSpace > -void wait( TaskPolicy< ExecSpace > & ); - -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* #ifndef KOKKOS_TASKPOLICY_HPP */ +#include <Kokkos_TaskScheduler.hpp> diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0de926aa12e481a7ccc797e26783b35dd9ddb029 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -0,0 +1,700 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TASKSCHEDULER_HPP +#define KOKKOS_TASKSCHEDULER_HPP + +//---------------------------------------------------------------------------- + +#include <Kokkos_Core_fwd.hpp> + +// If compiling with CUDA then must be using CUDA 8 or better +// and use relocateable device code to enable the task policy. +// nvcc relocatable device code option: --relocatable-device-code=true + +#if ( defined( KOKKOS_HAVE_CUDA ) ) + #if ( 8000 <= CUDA_VERSION ) && \ + defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE ) + + #define KOKKOS_ENABLE_TASKDAG + + #endif +#else + #define KOKKOS_ENABLE_TASKDAG +#endif + + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +//---------------------------------------------------------------------------- + +#include <Kokkos_MemoryPool.hpp> +#include <impl/Kokkos_Tags.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// Forward declarations used in Impl::TaskQueue + +template< typename Arg1 = void , typename Arg2 = void > +class Future ; + +template< typename Space > +class TaskScheduler ; + +} // namespace Kokkos + +#include <impl/Kokkos_TaskQueue.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/** + * + * Future< space > // value_type == void + * Future< value > // space == Default + * Future< value , space > + * + */ +template< typename Arg1 , typename Arg2 > +class Future { +private: + + template< typename > friend class TaskScheduler ; + template< typename , typename > friend class Future ; + template< typename , typename , typename > friend class Impl::TaskBase ; + + enum { Arg1_is_space = Kokkos::is_space< Arg1 >::value }; + enum { Arg2_is_space = Kokkos::is_space< Arg2 >::value }; + enum { Arg1_is_value = ! Arg1_is_space && + ! std::is_same< Arg1 , void >::value }; + enum { Arg2_is_value = ! Arg2_is_space && + ! std::is_same< Arg2 , void >::value }; + + static_assert( ! ( Arg1_is_space && Arg2_is_space ) + , "Future cannot be given two spaces" ); + + static_assert( ! ( Arg1_is_value && Arg2_is_value ) + , "Future cannot be given two value types" ); + + using ValueType = + typename std::conditional< Arg1_is_value , Arg1 , + typename std::conditional< Arg2_is_value , Arg2 , void + >::type >::type ; + + using Space = + typename std::conditional< Arg1_is_space , Arg1 , + typename std::conditional< Arg2_is_space , Arg2 , void + >::type >::type ; + + using task_base = Impl::TaskBase< Space , ValueType , void > ; + using queue_type = Impl::TaskQueue< Space > ; + + task_base * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + Future( task_base * task ) : m_task(0) + { if ( task ) queue_type::assign( & m_task , task ); } + + //---------------------------------------- + +public: + + using execution_space = typename Space::execution_space ; + using value_type = ValueType ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return 0 != m_task ? m_task->reference_count() : 0 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void clear() + { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~Future() { clear(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr Future() noexcept : m_task(0) {} + + KOKKOS_INLINE_FUNCTION + Future( Future && rhs ) + : m_task( rhs.m_task ) { rhs.m_task = 0 ; } + + KOKKOS_INLINE_FUNCTION + Future( const Future & rhs ) + : m_task(0) + { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( Future && rhs ) + { + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future & rhs ) + { + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + //---------------------------------------- + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future( Future<A1,A2> && rhs ) + : m_task( rhs.m_task ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + rhs.m_task = 0 ; + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future( const Future<A1,A2> & rhs ) + : m_task(0) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future & operator = ( const Future<A1,A2> & rhs ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + template< class A1 , class A2 > + KOKKOS_INLINE_FUNCTION + Future & operator = ( Future<A1,A2> && rhs ) + { + static_assert + ( std::is_same< Space , void >::value || + std::is_same< Space , typename Future<A1,A2>::Space >::value + , "Assigned Futures must have the same space" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same< value_type , typename Future<A1,A2>::value_type >::value + , "Assigned Futures must have the same value_type" ); + + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + typename task_base::get_return_type + get() const + { + if ( 0 == m_task ) { + Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); + } + return m_task->get(); + } +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +enum TaskType { TaskTeam = Impl::TaskBase<void,void,void>::TaskTeam + , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle }; + +enum TaskPriority { TaskHighPriority = 0 + , TaskRegularPriority = 1 + , TaskLowPriority = 2 }; + +template< typename Space > +void wait( TaskScheduler< Space > const & ); + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + + + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename ExecSpace > +class TaskScheduler +{ +private: + + using track_type = Kokkos::Impl::SharedAllocationTracker ; + using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; + using task_base = Impl::TaskBase< ExecSpace , void , void > ; + + track_type m_track ; + queue_type * m_queue ; + + //---------------------------------------- + // Process optional arguments to spawn and respawn functions + + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const ) {} + + // TaskTeam or TaskSingle + template< typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , TaskType const & arg + , Options const & ... opts ) + { + task->m_task_type = arg ; + assign( task , opts ... ); + } + + // TaskHighPriority or TaskRegularPriority or TaskLowPriority + template< typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , TaskPriority const & arg + , Options const & ... opts ) + { + task->m_priority = arg ; + assign( task , opts ... ); + } + + // Future for a dependence + template< typename A1 , typename A2 , typename ... Options > + KOKKOS_INLINE_FUNCTION static + void assign( task_base * const task + , Future< A1 , A2 > const & arg + , Options const & ... opts ) + { + // Assign dependence to task->m_next + // which will be processed within subsequent call to schedule. + // Error if the dependence is reset. + + if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != arg.m_task ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_increment( &(arg.m_task->m_ref_count) ); + } + + assign( task , opts ... ); + } + + //---------------------------------------- + +public: + + using execution_policy = TaskScheduler ; + using execution_space = ExecSpace ; + using memory_space = typename queue_type::memory_space ; + using member_type = Kokkos::Impl::TaskExec< ExecSpace > ; + + KOKKOS_INLINE_FUNCTION + TaskScheduler() : m_track(), m_queue(0) {} + + KOKKOS_INLINE_FUNCTION + TaskScheduler( TaskScheduler && rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskScheduler( TaskScheduler const & rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskScheduler & operator = ( TaskScheduler && rhs ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskScheduler & operator = ( TaskScheduler const & rhs ) = default ; + + TaskScheduler( memory_space const & arg_memory_space + , unsigned const arg_memory_pool_capacity + , unsigned const arg_memory_pool_log2_superblock = 12 ) + : m_track() + , m_queue(0) + { + typedef Kokkos::Impl::SharedAllocationRecord + < memory_space , typename queue_type::Destroy > + record_type ; + + record_type * record = + record_type::allocate( arg_memory_space + , "TaskQueue" + , sizeof(queue_type) + ); + + m_queue = new( record->data() ) + queue_type( arg_memory_space + , arg_memory_pool_capacity + , arg_memory_pool_log2_superblock ); + + record->m_destroy.m_queue = m_queue ; + + m_track.assign_allocated_record_to_uninitialized( record ); + } + + //---------------------------------------- + /**\brief Allocation size for a spawned task */ + template< typename FunctorType > + KOKKOS_FUNCTION + size_t spawn_allocation_size() const + { + using task_type = Impl::TaskBase< execution_space + , typename FunctorType::value_type + , FunctorType > ; + + return m_queue->allocate_block_size( sizeof(task_type) ); + } + + /**\brief Allocation size for a when_all aggregate */ + KOKKOS_FUNCTION + size_t when_all_allocation_size( int narg ) const + { + using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + + return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) ); + } + + //---------------------------------------- + + /**\brief A task spawns a task with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + * 3) Team or Serial + */ + template< typename FunctorType , typename ... Options > + KOKKOS_FUNCTION + Future< typename FunctorType::value_type , ExecSpace > + task_spawn( FunctorType const & arg_functor + , Options const & ... arg_options + ) const + { + using value_type = typename FunctorType::value_type ; + using future_type = Future< value_type , execution_space > ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + //---------------------------------------- + // Give single-thread back-ends an opportunity to clear + // queue of ready tasks before allocating a new task + + m_queue->iff_single_thread_recursive_execute(); + + //---------------------------------------- + + future_type f ; + + // Allocate task from memory pool + f.m_task = + reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type))); + + if ( f.m_task ) { + + // Placement new construction + new ( f.m_task ) task_type( arg_functor ); + + // Reference count starts at two + // +1 for matching decrement when task is complete + // +1 for future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = sizeof(task_type); + + assign( f.m_task , arg_options... ); + + // Spawning from within the execution space so the + // apply function pointer is guaranteed to be valid + f.m_task->m_apply = task_type::apply ; + + m_queue->schedule( f.m_task ); + // this task may be updated or executed at any moment + } + + return f ; + } + + /**\brief The host process spawns a task with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + * 3) Team or Serial + */ + template< typename FunctorType , typename ... Options > + inline + Future< typename FunctorType::value_type , ExecSpace > + host_spawn( FunctorType const & arg_functor + , Options const & ... arg_options + ) const + { + using value_type = typename FunctorType::value_type ; + using future_type = Future< value_type , execution_space > ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + if ( m_queue == 0 ) { + Kokkos::abort("Kokkos::TaskScheduler not initialized"); + } + + future_type f ; + + // Allocate task from memory pool + f.m_task = + reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) ); + + if ( f.m_task ) { + + // Placement new construction + new( f.m_task ) task_type( arg_functor ); + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = sizeof(task_type); + + assign( f.m_task , arg_options... ); + + // Potentially spawning outside execution space so the + // apply function pointer must be obtained from execution space. + // Required for Cuda execution space function pointer. + queue_type::specialization::template + proc_set_apply< FunctorType >( & f.m_task->m_apply ); + + m_queue->schedule( f.m_task ); + } + return f ; + } + + /**\brief Return a future that is complete + * when all input futures are complete. + */ + template< typename A1 , typename A2 > + KOKKOS_FUNCTION + Future< ExecSpace > + when_all( int narg , Future< A1 , A2 > const * const arg ) const + { + static_assert + ( std::is_same< execution_space + , typename Future< A1 , A2 >::execution_space + >::value + , "Future must have same execution space" ); + + using future_type = Future< ExecSpace > ; + using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + + future_type f ; + + size_t const size = sizeof(task_base) + narg * sizeof(task_base*); + + f.m_task = + reinterpret_cast< task_base * >( m_queue->allocate( size ) ); + + if ( f.m_task ) { + + new( f.m_task ) task_base(); + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + f.m_task->m_queue = m_queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = size ; + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + + task_base ** const dep = f.m_task->aggregate_dependences(); + + // Assign dependences to increment their reference count + // The futures may be destroyed upon returning from this call + // so increment reference count to track this assignment. + + for ( int i = 0 ; i < narg ; ++i ) { + task_base * const t = dep[i] = arg[i].m_task ; + if ( 0 != t ) { + Kokkos::atomic_increment( &(t->m_ref_count) ); + } + } + + m_queue->schedule( f.m_task ); + // this when_all may be processed at any moment + } + + return f ; + } + + /**\brief An executing task respawns itself with options + * + * 1) High, Normal, or Low priority + * 2) With or without dependence + */ + template< class FunctorType , typename ... Options > + KOKKOS_FUNCTION + void respawn( FunctorType * task_self + , Options const & ... arg_options ) const + { + using value_type = typename FunctorType::value_type ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + task_base * const zero = (task_base *) 0 ; + task_base * const lock = (task_base *) task_base::LockTag ; + task_type * const task = static_cast< task_type * >( task_self ); + + // Precondition: + // task is in Executing state + // therefore m_next == LockTag + // + // Change to m_next == 0 for no dependence + + if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) { + Kokkos::abort("TaskScheduler::respawn ERROR: already respawned"); + } + + assign( task , arg_options... ); + + // Postcondition: + // task is in Executing-Respawn state + // therefore m_next == dependece or 0 + } + + //---------------------------------------- + + template< typename S > + friend + void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + + //---------------------------------------- + + inline + int allocation_capacity() const noexcept + { return m_queue->m_memory.get_mem_size(); } + + KOKKOS_INLINE_FUNCTION + int allocated_task_count() const noexcept + { return m_queue->m_count_alloc ; } + + KOKKOS_INLINE_FUNCTION + int allocated_task_count_max() const noexcept + { return m_queue->m_max_alloc ; } + + KOKKOS_INLINE_FUNCTION + long allocated_task_count_accum() const noexcept + { return m_queue->m_accum_alloc ; } + +}; + +template< typename ExecSpace > +inline +void wait( TaskScheduler< ExecSpace > const & policy ) +{ policy.m_queue->execute(); } + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index c9ebbf92652b5d9a2e859cf2587b8089897d3c62..f01b14724ac430924a253cb16bb4e57ec67348e3 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -189,6 +189,17 @@ public: namespace Kokkos { namespace Impl { +template<> +struct MemorySpaceAccess + < Kokkos::Threads::memory_space + , Kokkos::Threads::scratch_memory_space + > +{ + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::Threads::memory_space diff --git a/lib/kokkos/core/src/Kokkos_Timer.hpp b/lib/kokkos/core/src/Kokkos_Timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4eca5037e44408f5f54af173530b56c60c58e22a --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Timer.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TIMER_HPP +#define KOKKOS_TIMER_HPP + +#include <stddef.h> + +#ifdef _MSC_VER +#undef KOKKOS_USE_LIBRT +#include <gettimeofday.c> +#else +#ifdef KOKKOS_USE_LIBRT +#include <ctime> +#else +#include <sys/time.h> +#endif +#endif + +namespace Kokkos { + +/** \brief Time since construction */ + +class Timer { +private: + #ifdef KOKKOS_USE_LIBRT + struct timespec m_old; + #else + struct timeval m_old ; + #endif + Timer( const Timer & ); + Timer & operator = ( const Timer & ); +public: + + inline + void reset() { + #ifdef KOKKOS_USE_LIBRT + clock_gettime(CLOCK_REALTIME, &m_old); + #else + gettimeofday( & m_old , ((struct timezone *) NULL ) ); + #endif + } + + inline + ~Timer() {} + + inline + Timer() { reset(); } + + inline + double seconds() const + { + #ifdef KOKKOS_USE_LIBRT + struct timespec m_new; + clock_gettime(CLOCK_REALTIME, &m_new); + + return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + + ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 ); + #else + struct timeval m_new ; + + gettimeofday( & m_new , ((struct timezone *) NULL ) ); + + return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + + ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 ); + #endif + } +}; + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_TIMER_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 1cc8b0338155c8f8be724181806097a927d606d2..b728b36492c0d318e32b025d97f05aaf70c98dd3 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -61,9 +61,6 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template< class DstMemorySpace , class SrcMemorySpace > -struct DeepCopy ; - template< class DataType > struct ViewArrayAnalysis ; @@ -76,31 +73,23 @@ struct ViewDataAnalysis ; template< class , class ... > class ViewMapping { public: enum { is_assignable = false }; }; -template< class MemorySpace > -struct ViewOperatorBoundsErrorAbort ; +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ -template<> -struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > { - static void apply( const size_t rank - , const size_t n0 , const size_t n1 - , const size_t n2 , const size_t n3 - , const size_t n4 , const size_t n5 - , const size_t n6 , const size_t n7 - , const size_t i0 , const size_t i1 - , const size_t i2 , const size_t i3 - , const size_t i4 , const size_t i5 - , const size_t i6 , const size_t i7 ); -}; +namespace Kokkos { +namespace Impl { + +using Kokkos::Experimental::Impl::ViewMapping ; +using Kokkos::Experimental::Impl::ViewDataAnalysis ; } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { /** \class ViewTraits * \brief Traits class for accessing attributes of a View. @@ -168,8 +157,7 @@ struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_space<Space>::value typedef typename Space::execution_space execution_space ; typedef typename Space::memory_space memory_space ; - typedef typename Kokkos::Impl::is_space< Space >::host_mirror_space - HostMirrorSpace ; + typedef typename Kokkos::Impl::HostMirror< Space >::Space HostMirrorSpace ; typedef typename execution_space::array_layout array_layout ; typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ; }; @@ -225,7 +213,7 @@ private: std::conditional < ! std::is_same< typename prop::HostMirrorSpace , void >::value , typename prop::HostMirrorSpace - , typename Kokkos::Impl::is_space< ExecutionSpace >::host_mirror_space + , typename Kokkos::Impl::HostMirror< ExecutionSpace >::Space >::type HostMirrorSpace ; @@ -238,7 +226,7 @@ private: // Analyze data type's properties, // May be specialized based upon the layout and value type - typedef Kokkos::Experimental::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ; + typedef Kokkos::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ; public: @@ -376,31 +364,29 @@ public: template< class DataType , class ... Properties > class View ; -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#include <impl/KokkosExp_ViewMapping.hpp> -#include <impl/KokkosExp_ViewArray.hpp> +#include <impl/Kokkos_ViewMapping.hpp> +#include <impl/Kokkos_ViewArray.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace { -constexpr Kokkos::Experimental::Impl::ALL_t - ALL = Kokkos::Experimental::Impl::ALL_t(); +constexpr Kokkos::Impl::ALL_t + ALL = Kokkos::Impl::ALL_t(); -constexpr Kokkos::Experimental::Impl::WithoutInitializing_t - WithoutInitializing = Kokkos::Experimental::Impl::WithoutInitializing_t(); +constexpr Kokkos::Impl::WithoutInitializing_t + WithoutInitializing = Kokkos::Impl::WithoutInitializing_t(); -constexpr Kokkos::Experimental::Impl::AllowPadding_t - AllowPadding = Kokkos::Experimental::Impl::AllowPadding_t(); +constexpr Kokkos::Impl::AllowPadding_t + AllowPadding = Kokkos::Impl::AllowPadding_t(); } @@ -446,14 +432,12 @@ view_wrap( Args const & ... args ) return return_type( args... ); } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { template< class DataType , class ... Properties > class View ; @@ -471,7 +455,7 @@ class View : public ViewTraits< DataType , Properties ... > { private: template< class , class ... > friend class View ; - template< class , class ... > friend class Impl::ViewMapping ; + template< class , class ... > friend class Kokkos::Impl::ViewMapping ; public: @@ -479,8 +463,8 @@ public: private: - typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + typedef Kokkos::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; track_type m_track ; map_type m_map ; @@ -607,7 +591,7 @@ public: // Allow specializations to query their specialized map KOKKOS_INLINE_FUNCTION - const Kokkos::Experimental::Impl::ViewMapping< traits , void > & + const Kokkos::Impl::ViewMapping< traits , void > & implementation_map() const { return m_map ; } //---------------------------------------- @@ -629,18 +613,24 @@ private: ( is_layout_left || is_layout_right || is_layout_stride ) }; + template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space + { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + + template< class Space > struct verify_space<Space,false> + { KOKKOS_FORCEINLINE_FUNCTION static void check() + { Kokkos::abort("Kokkos::View ERROR: attempt to access inaccessible memory space"); }; + }; + #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) #define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ - < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ - Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; + View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ + Kokkos::Impl::view_verify_operator_bounds ARG ; #else #define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ - < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); + View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); #endif @@ -656,7 +646,11 @@ public: ), reference_type >::type operator()( Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) ) + #endif return m_map.reference(); } @@ -675,7 +669,11 @@ public: operator()( const I0 & i0 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) + #endif return m_map.reference(i0); } @@ -692,7 +690,12 @@ public: operator()( const I0 & i0 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) + #endif return m_map.m_handle[ i0 ]; } @@ -709,7 +712,11 @@ public: operator()( const I0 & i0 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) + #endif return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; } @@ -726,7 +733,11 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) + #endif return m_map.reference(i0); } @@ -741,7 +752,11 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) + #endif return m_map.m_handle[ i0 ]; } @@ -756,7 +771,11 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) + #endif return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; } @@ -775,7 +794,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.reference(i0,i1); } @@ -792,7 +815,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; } @@ -809,7 +836,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; } @@ -826,7 +857,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; } @@ -843,7 +878,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; } @@ -860,7 +899,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) + #endif return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + i1 * m_map.m_offset.m_stride.S1 ]; @@ -880,7 +923,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; } @@ -896,7 +943,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) ) + #endif return m_map.reference(i0,i1,i2); } @@ -915,7 +966,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; } @@ -931,7 +986,11 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) ) + #endif return m_map.reference(i0,i1,i2,i3); } @@ -952,7 +1011,11 @@ public: , const I4 & i4 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; } @@ -970,7 +1033,11 @@ public: , const I4 & i4 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) ) + #endif return m_map.reference(i0,i1,i2,i3,i4); } @@ -991,7 +1058,11 @@ public: , const I4 & i4 , const I5 & i5 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; } @@ -1009,7 +1080,11 @@ public: , const I4 & i4 , const I5 & i5 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5); } @@ -1030,7 +1105,11 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } @@ -1048,7 +1127,11 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } @@ -1069,7 +1152,11 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #endif return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } @@ -1087,7 +1174,11 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 , Args ... args ) const { - KOKKOS_VIEW_OPERATOR_VERIFY( (m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #else + KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + #endif return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7); } @@ -1126,7 +1217,7 @@ public: , m_map() { typedef typename View<RT,RP...>::traits SrcTraits ; - typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; static_assert( Mapping::is_assignable , "Incompatible View copy construction" ); Mapping::assign( m_map , rhs.m_map , rhs.m_track ); } @@ -1136,7 +1227,7 @@ public: View & operator = ( const View<RT,RP...> & rhs ) { typedef typename View<RT,RP...>::traits SrcTraits ; - typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; static_assert( Mapping::is_assignable , "Incompatible View copy assignment" ); Mapping::assign( m_map , rhs.m_map , rhs.m_track ); m_track.assign( rhs.m_track , traits::is_managed ); @@ -1156,14 +1247,14 @@ public: { typedef View< RT , RP... > SrcType ; - typedef Kokkos::Experimental::Impl::ViewMapping + typedef Kokkos::Impl::ViewMapping < void /* deduce destination view type from source view traits */ , typename SrcType::traits , Arg0 , Args... > Mapping ; typedef typename Mapping::type DstType ; - static_assert( Kokkos::Experimental::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable + static_assert( Kokkos::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable , "Subview construction requires compatible view and subview arguments" ); Mapping::assign( m_map, src_view.m_map, arg0 , args... ); @@ -1243,7 +1334,7 @@ public: #endif //------------------------------------------------------------ - Kokkos::Experimental::Impl::SharedAllocationRecord<> * + Kokkos::Impl::SharedAllocationRecord<> * record = m_map.allocate_shared( prop , arg_layout ); //------------------------------------------------------------ @@ -1324,7 +1415,7 @@ public: explicit inline View( const Label & arg_label , typename std::enable_if< - Kokkos::Experimental::Impl::is_view_label<Label>::value , + Kokkos::Impl::is_view_label<Label>::value , typename traits::array_layout >::type const & arg_layout ) : View( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout ) @@ -1335,7 +1426,7 @@ public: explicit inline View( const Label & arg_label , typename std::enable_if< - Kokkos::Experimental::Impl::is_view_label<Label>::value , + Kokkos::Impl::is_view_label<Label>::value , const size_t >::type arg_N0 = 0 , const size_t arg_N1 = 0 , const size_t arg_N2 = 0 @@ -1357,7 +1448,7 @@ public: View( const ViewAllocateWithoutInitializing & arg_prop , const typename traits::array_layout & arg_layout ) - : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ) + : View( Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ) , arg_layout ) {} @@ -1373,7 +1464,7 @@ public: , const size_t arg_N6 = 0 , const size_t arg_N7 = 0 ) - : View( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ) + : View( Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ) , typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) @@ -1499,7 +1590,7 @@ public: template< class V , class ... Args > using Subview = - typename Kokkos::Experimental::Impl::ViewMapping + typename Kokkos::Impl::ViewMapping < void /* deduce subview type from source view traits */ , typename V::traits , Args ... @@ -1507,7 +1598,7 @@ using Subview = template< class D, class ... P , class ... Args > KOKKOS_INLINE_FUNCTION -typename Kokkos::Experimental::Impl::ViewMapping +typename Kokkos::Impl::ViewMapping < void /* deduce subview type from source view traits */ , ViewTraits< D , P... > , Args ... @@ -1518,7 +1609,7 @@ subview( const View< D, P... > & src , Args ... args ) "subview requires one argument for each source View rank" ); return typename - Kokkos::Experimental::Impl::ViewMapping + Kokkos::Impl::ViewMapping < void /* deduce subview type from source view traits */ , ViewTraits< D , P ... > , Args ... >::type( src , args ... ); @@ -1526,7 +1617,7 @@ subview( const View< D, P... > & src , Args ... args ) template< class MemoryTraits , class D, class ... P , class ... Args > KOKKOS_INLINE_FUNCTION -typename Kokkos::Experimental::Impl::ViewMapping +typename Kokkos::Impl::ViewMapping < void /* deduce subview type from source view traits */ , ViewTraits< D , P... > , Args ... @@ -1537,7 +1628,7 @@ subview( const View< D, P... > & src , Args ... args ) "subview requires one argument for each source View rank" ); return typename - Kokkos::Experimental::Impl::ViewMapping + Kokkos::Impl::ViewMapping < void /* deduce subview type from source view traits */ , ViewTraits< D , P ... > , Args ... > @@ -1545,16 +1636,12 @@ subview( const View< D, P... > & src , Args ... args ) ::type( src , args ... ); } - - -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { template< class LT , class ... LP , class RT , class ... RP > KOKKOS_INLINE_FUNCTION @@ -1593,7 +1680,6 @@ bool operator != ( const View<LT,LP...> & lhs , return ! ( operator==(lhs,rhs) ); } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- @@ -1604,11 +1690,11 @@ namespace Impl { inline void shared_allocation_tracking_claim_and_disable() -{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); } +{ Kokkos::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); } inline void shared_allocation_tracking_release_and_enable() -{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); } +{ Kokkos::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); } } /* namespace Impl */ } /* namespace Kokkos */ @@ -1617,7 +1703,6 @@ void shared_allocation_tracking_release_and_enable() //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template< class OutputView , typename Enable = void > @@ -1719,14 +1804,12 @@ struct ViewRemap { }; } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { /** \brief Deep copy a value from Host memory into a view. */ template< class DT , class ... DP > @@ -1743,7 +1826,7 @@ void deep_copy typename ViewTraits<DT,DP...>::value_type >::value , "deep_copy requires non-const type" ); - Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value ); + Kokkos::Impl::ViewFill< View<DT,DP...> >( dst , value ); } /** \brief Deep copy into a value in Host memory from a view. */ @@ -1830,10 +1913,10 @@ void deep_copy typedef typename src_type::memory_space src_memory_space ; enum { DstExecCanAccessSrc = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible }; enum { SrcExecCanAccessDst = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible }; if ( (void *) dst.data() != (void*) src.data() ) { @@ -1916,11 +1999,11 @@ void deep_copy } else if ( DstExecCanAccessSrc ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. - Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src ); } else if ( SrcExecCanAccessDst ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. - Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); + Kokkos::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); } else { Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); @@ -1928,14 +2011,12 @@ void deep_copy } } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { /** \brief Deep copy a value from Host memory into a view. */ template< class ExecSpace ,class DT , class ... DP > @@ -1954,7 +2035,7 @@ void deep_copy typename ViewTraits<DT,DP...>::value_type >::value , "deep_copy requires non-const type" ); - Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value ); + Kokkos::Impl::ViewFill< View<DT,DP...> >( dst , value ); } /** \brief Deep copy into a value in Host memory from a view. */ @@ -2049,10 +2130,10 @@ void deep_copy typedef typename src_type::memory_space src_memory_space ; enum { DstExecCanAccessSrc = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible }; enum { SrcExecCanAccessDst = - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value }; + Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible }; if ( (void *) dst.data() != (void*) src.data() ) { @@ -2089,11 +2170,11 @@ void deep_copy } else if ( DstExecCanAccessSrc ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. - Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src ); + Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src ); } else if ( SrcExecCanAccessDst ) { // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape. - Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); + Kokkos::Impl::ViewRemap< dst_type , src_type , src_execution_space >( dst , src ); } else { Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); @@ -2101,21 +2182,19 @@ void deep_copy } } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { // Deduce Mirror Types template<class Space, class T, class ... P> struct MirrorViewType { // The incoming view_type - typedef typename Kokkos::Experimental::View<T,P...> src_view_type; + typedef typename Kokkos::View<T,P...> src_view_type; // The memory space for the mirror view typedef typename Space::memory_space memory_space; // Check whether it is the same memory space @@ -2125,7 +2204,7 @@ struct MirrorViewType { // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. typedef typename src_view_type::non_const_data_type data_type; // The destination view type if it is not the same memory space - typedef Kokkos::Experimental::View<data_type,array_layout,Space> dest_view_type; + typedef Kokkos::View<data_type,array_layout,Space> dest_view_type; // If it is the same memory_space return the existsing view_type // This will also keep the unmanaged trait if necessary typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; @@ -2134,7 +2213,7 @@ struct MirrorViewType { template<class Space, class T, class ... P> struct MirrorType { // The incoming view_type - typedef typename Kokkos::Experimental::View<T,P...> src_view_type; + typedef typename Kokkos::View<T,P...> src_view_type; // The memory space for the mirror view typedef typename Space::memory_space memory_space; // Check whether it is the same memory space @@ -2144,17 +2223,17 @@ struct MirrorType { // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. typedef typename src_view_type::non_const_data_type data_type; // The destination view type if it is not the same memory space - typedef Kokkos::Experimental::View<data_type,array_layout,Space> view_type; + typedef Kokkos::View<data_type,array_layout,Space> view_type; }; } template< class T , class ... P > inline -typename Kokkos::Experimental::View<T,P...>::HostMirror -create_mirror( const Kokkos::Experimental::View<T,P...> & src +typename Kokkos::View<T,P...>::HostMirror +create_mirror( const Kokkos::View<T,P...> & src , typename std::enable_if< - ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 ) @@ -2175,10 +2254,10 @@ create_mirror( const Kokkos::Experimental::View<T,P...> & src template< class T , class ... P > inline -typename Kokkos::Experimental::View<T,P...>::HostMirror -create_mirror( const Kokkos::Experimental::View<T,P...> & src +typename Kokkos::View<T,P...>::HostMirror +create_mirror( const Kokkos::View<T,P...> & src , typename std::enable_if< - std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout + std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 ) @@ -2212,21 +2291,21 @@ create_mirror( const Kokkos::Experimental::View<T,P...> & src // Create a mirror in a new space (specialization for different space) template<class Space, class T, class ... P> -typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::View<T,P...> & src) { +typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::View<T,P...> & src) { return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout()); } template< class T , class ... P > inline -typename Kokkos::Experimental::View<T,P...>::HostMirror -create_mirror_view( const Kokkos::Experimental::View<T,P...> & src +typename Kokkos::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::View<T,P...> & src , typename std::enable_if<( - std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space - , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space + std::is_same< typename Kokkos::View<T,P...>::memory_space + , typename Kokkos::View<T,P...>::HostMirror::memory_space >::value && - std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type - , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type + std::is_same< typename Kokkos::View<T,P...>::data_type + , typename Kokkos::View<T,P...>::HostMirror::data_type >::value )>::type * = 0 ) @@ -2236,26 +2315,26 @@ create_mirror_view( const Kokkos::Experimental::View<T,P...> & src template< class T , class ... P > inline -typename Kokkos::Experimental::View<T,P...>::HostMirror -create_mirror_view( const Kokkos::Experimental::View<T,P...> & src +typename Kokkos::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::View<T,P...> & src , typename std::enable_if< ! ( - std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space - , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space + std::is_same< typename Kokkos::View<T,P...>::memory_space + , typename Kokkos::View<T,P...>::HostMirror::memory_space >::value && - std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type - , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type + std::is_same< typename Kokkos::View<T,P...>::data_type + , typename Kokkos::View<T,P...>::HostMirror::data_type >::value )>::type * = 0 ) { - return Kokkos::Experimental::create_mirror( src ); + return Kokkos::create_mirror( src ); } // Create a mirror view in a new space (specialization for same space) template<class Space, class T, class ... P> typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src +create_mirror_view(const Space& , const Kokkos::View<T,P...> & src , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { return src; } @@ -2263,24 +2342,26 @@ create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src // Create a mirror view in a new space (specialization for different space) template<class Space, class T, class ... P> typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view(const Space& , const Kokkos::Experimental::View<T,P...> & src +create_mirror_view(const Space& , const Kokkos::View<T,P...> & src , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout()); } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { /** \brief Resize a view with copying old data to new data at the corresponding indices. */ template< class T , class ... P > inline -void resize( Kokkos::Experimental::View<T,P...> & v , +typename std::enable_if< + std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutLeft>::value || + std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutRight>::value +>::type +resize( Kokkos::View<T,P...> & v , const size_t n0 = 0 , const size_t n1 = 0 , const size_t n2 = 0 , @@ -2290,13 +2371,13 @@ void resize( Kokkos::Experimental::View<T,P...> & v , const size_t n6 = 0 , const size_t n7 = 0 ) { - typedef Kokkos::Experimental::View<T,P...> view_type ; + typedef Kokkos::View<T,P...> view_type ; - static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); view_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6, n7 ); - Kokkos::Experimental::Impl::ViewRemap< view_type , view_type >( v_resized , v ); + Kokkos::Impl::ViewRemap< view_type , view_type >( v_resized , v ); v = v_resized ; } @@ -2304,7 +2385,28 @@ void resize( Kokkos::Experimental::View<T,P...> & v , /** \brief Resize a view with copying old data to new data at the corresponding indices. */ template< class T , class ... P > inline -void realloc( Kokkos::Experimental::View<T,P...> & v , +void resize( Kokkos::View<T,P...> & v , + const typename Kokkos::View<T,P...>::array_layout & layout) +{ + typedef Kokkos::View<T,P...> view_type ; + + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" ); + + view_type v_resized( v.label(), layout ); + + Kokkos::Impl::ViewRemap< view_type , view_type >( v_resized , v ); + + v = v_resized ; +} + +/** \brief Resize a view with discarding old data. */ +template< class T , class ... P > +inline +typename std::enable_if< + std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutLeft>::value || + std::is_same<typename Kokkos::View<T,P...>::array_layout,Kokkos::LayoutRight>::value +>::type +realloc( Kokkos::View<T,P...> & v , const size_t n0 = 0 , const size_t n1 = 0 , const size_t n2 = 0 , @@ -2314,9 +2416,9 @@ void realloc( Kokkos::Experimental::View<T,P...> & v , const size_t n6 = 0 , const size_t n7 = 0 ) { - typedef Kokkos::Experimental::View<T,P...> view_type ; + typedef Kokkos::View<T,P...> view_type ; - static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); const std::string label = v.label(); @@ -2324,40 +2426,65 @@ void realloc( Kokkos::Experimental::View<T,P...> & v , v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 ); } -} /* namespace Experimental */ +/** \brief Resize a view with discarding old data. */ +template< class T , class ... P > +inline +void realloc( Kokkos::View<T,P...> & v , + const typename Kokkos::View<T,P...>::array_layout & layout) +{ + typedef Kokkos::View<T,P...> view_type ; + + static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" ); + + const std::string label = v.label(); + + v = view_type(); // Deallocate first, if the only view to allocation + v = view_type( label, layout ); +} } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +// For backward compatibility: namespace Kokkos { +namespace Experimental { -template< class D , class ... P > -using ViewTraits = Kokkos::Experimental::ViewTraits<D,P...> ; +using Kokkos::ViewTraits ; +using Kokkos::View ; +using Kokkos::Subview ; +using Kokkos::is_view ; +using Kokkos::subview ; +using Kokkos::ALL ; +using Kokkos::WithoutInitializing ; +using Kokkos::AllowPadding ; +using Kokkos::view_alloc ; +using Kokkos::view_wrap ; +using Kokkos::deep_copy ; +using Kokkos::create_mirror ; +using Kokkos::create_mirror_view ; +using Kokkos::resize ; +using Kokkos::realloc ; -using Experimental::View ; //modified due to gcc parser bug -//template< class D , class ... P > -//using View = Kokkos::Experimental::View<D,P...> ; +namespace Impl { -using Kokkos::Experimental::ALL ; -using Kokkos::Experimental::WithoutInitializing ; -using Kokkos::Experimental::AllowPadding ; -using Kokkos::Experimental::view_alloc ; -using Kokkos::Experimental::view_wrap ; +using Kokkos::Impl::ViewFill ; +using Kokkos::Impl::ViewRemap ; +using Kokkos::Impl::ViewCtorProp ; +using Kokkos::Impl::is_view_label ; +using Kokkos::Impl::WithoutInitializing_t ; +using Kokkos::Impl::AllowPadding_t ; +using Kokkos::Impl::SharedAllocationRecord ; +using Kokkos::Impl::SharedAllocationTracker ; -using Kokkos::Experimental::deep_copy ; -using Kokkos::Experimental::create_mirror ; -using Kokkos::Experimental::create_mirror_view ; -using Kokkos::Experimental::subview ; -using Kokkos::Experimental::resize ; -using Kokkos::Experimental::realloc ; -using Kokkos::Experimental::is_view ; +} /* namespace Impl */ +} /* namespace Experimental */ +} /* namespace Kokkos */ +namespace Kokkos { namespace Impl { -using Kokkos::Experimental::is_view ; - -class ViewDefault {}; +using Kokkos::is_view ; template< class SrcViewType , class Arg0Type @@ -2371,8 +2498,7 @@ template< class SrcViewType > struct ViewSubview /* { typedef ... type ; } */ ; -} - +} /* namespace Impl */ } /* namespace Kokkos */ #include <impl/Kokkos_Atomic_View.hpp> diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index dc27d341ac8ee4a40150bc93476b994666189739..316f61fd4d9fcd4c7ce4ec37592659deef006bce 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -1,24 +1,25 @@ -KOKKOS_PATH = ../.. +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../.. +endif PREFIX ?= /usr/local/lib/kokkos default: messages build-lib echo "End Build" -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(NVCC_WRAPPER) - LINKFLAGS ?= +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= g++ - LINKFLAGS ?= + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + PWD = $(shell pwd) KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) @@ -49,6 +50,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) CONDITIONAL_COPIES += copy-openmp endif +ifeq ($(KOKKOS_OS),CYGWIN) + COPY_FLAG = -u +endif +ifeq ($(KOKKOS_OS),Linux) + COPY_FLAG = -u +endif +ifeq ($(KOKKOS_OS),Darwin) + COPY_FLAG = +endif + messages: echo "Start Build" @@ -77,6 +88,15 @@ build-makefile-kokkos: echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos + echo "" >> Makefile.kokkos + echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos + echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos + echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos + echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos + echo "" >> Makefile.kokkos + echo "#Fake kokkos-clean target" >> Makefile.kokkos + echo "kokkos-clean:" >> Makefile.kokkos + echo "" >> Makefile.kokkos sed \ -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ @@ -98,27 +118,27 @@ mkdir: copy-cuda: mkdir mkdir -p $(PREFIX)/include/Cuda - cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda + cp $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda copy-threads: mkdir mkdir -p $(PREFIX)/include/Threads - cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads + cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads copy-qthread: mkdir mkdir -p $(PREFIX)/include/Qthread - cp $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread + cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP - cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP + cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP install: mkdir $(CONDITIONAL_COPIES) build-lib - cp $(NVCC_WRAPPER) $(PREFIX)/bin - cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include - cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl - cp Makefile.kokkos $(PREFIX) - cp libkokkos.a $(PREFIX)/lib - cp KokkosCore_config.h $(PREFIX)/include + cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin + cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include + cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl + cp $(COPY_FLAG) Makefile.kokkos $(PREFIX) + cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib + cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include clean: kokkos-clean rm -f Makefile.kokkos diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 3e22033f7c058dc6c084c445685c80beb8620da8..00a9957ee1b4291c48d1cac4d44c7669268b36ff 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -43,7 +43,7 @@ #include <Kokkos_Core.hpp> -#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) #include <impl/Kokkos_TaskQueue_impl.hpp> @@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 2761247c40c930d1b454acfc373be2c8d8aaf4a3..15dbb77c26c7432497417b0b27508b00d3d717af 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -44,7 +44,7 @@ #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP #define KOKKOS_IMPL_OPENMP_TASK_HPP -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -156,21 +156,30 @@ template<typename iType> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > > TeamThreadRange - ( Impl::TaskExec< Kokkos::OpenMP > & thread - , const iType & count ) + ( Impl::TaskExec< Kokkos::OpenMP > & thread, const iType & count ) { return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count); } -template<typename iType> +template<typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > > +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::TaskExec< Kokkos::OpenMP > > TeamThreadRange - ( Impl:: TaskExec< Kokkos::OpenMP > & thread - , const iType & start - , const iType & end ) + ( Impl:: TaskExec< Kokkos::OpenMP > & thread, const iType1 & begin, const iType2 & end ) +{ + typedef typename std::common_type<iType1, iType2>::type iType; + return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::TaskExec< Kokkos::OpenMP > >(thread, begin, end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > > +ThreadVectorRange + ( Impl::TaskExec< Kokkos::OpenMP > & thread + , const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >(thread,start,end); + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count); } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. @@ -351,6 +360,6 @@ void parallel_scan //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp index 7d06a2f66149f93bd43d6a4976ae9060b8833997..25e7d89277fe3dc46df093235a3195ef6bdeedd8 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp @@ -300,12 +300,12 @@ void OpenMP::initialize( unsigned thread_count , } // Check for over-subscription - if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { - std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; - std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; - std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; - std::cout << " Requested: " << thread_count << " threads per process." << std::endl; - } + //if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { + // std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; + // std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; + // std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; + // std::cout << " Requested: " << thread_count << " threads per process." << std::endl; + //} // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp index a01c9cb644e86f423409f1eeb56a014b68f87968..a2bfa742d1bad422af65b2dd712f9370b39ddf61 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -180,7 +180,7 @@ public: bool success = false; while(!success) { work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); - success = ( (work_range_new == work_range_old) || + success = ( (work_range_new == work_range_old) || (work_range_new.first>=work_range_new.second)); work_range_old = work_range_new; work_range_new.first+=1; @@ -393,12 +393,14 @@ public: typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE , ValueType , void >::type type ; - type * const local_value = ((type*) m_exec.scratch_thread()); - if(team_rank() == thread_id) - *local_value = value; + type volatile * const shared_value = + ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); + + if ( team_rank() == thread_id ) *shared_value = value; memory_fence(); - team_barrier(); - value = *local_value; + team_barrier(); // Wait for 'thread_id' to write + value = *shared_value ; + team_barrier(); // Wait for team members to read #endif } @@ -655,8 +657,6 @@ public: static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; } }; - - template< class ... Properties > class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits<Properties ...> { @@ -740,9 +740,9 @@ public: inline int team_size() const { return m_team_size ; } inline int league_size() const { return m_league_size ; } + inline size_t scratch_size(const int& level, int team_size_ = -1) const { - if(team_size_ < 0) - team_size_ = m_team_size; + if(team_size_ < 0) team_size_ = m_team_size; return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level] ; } @@ -840,7 +840,6 @@ public: }; } // namespace Impl - } // namespace Kokkos //---------------------------------------------------------------------------- @@ -864,29 +863,26 @@ int OpenMP::thread_pool_rank() #endif } -} // namespace Kokkos - - -namespace Kokkos { - -template<typename iType> +template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember> - TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count); +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember > +TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType& count ) { + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, count ); } -template<typename iType> +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember> - TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end); +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::OpenMPexecTeamMember > +TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType1& begin, const iType2& end ) { + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, iType(begin), iType(end) ); } template<typename iType> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember > - ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { +ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count); } @@ -899,6 +895,7 @@ KOKKOS_INLINE_FUNCTION Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) { return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread); } + } // namespace Kokkos namespace Kokkos { @@ -959,7 +956,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Ope } //namespace Kokkos - namespace Kokkos { /** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. * @@ -1080,4 +1076,3 @@ void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_s } #endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ - diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp index 5b6419289fc4874f1d97034aa7decd9be0eca147..8ee70b9efa6220a93494472cc67b73641886f9ed 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp @@ -511,6 +511,7 @@ public: }; } // namespace Impl + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -518,26 +519,24 @@ public: namespace Kokkos { -template<typename iType> +template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember> -TeamThreadRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember > +TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,count); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, count ); } -template<typename iType> +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember> -TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread - , const iType & begin - , const iType & end - ) +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::QthreadTeamPolicyMember > +TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType1 & begin, const iType2 & end ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,begin,end); + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, iType(begin), iType(end) ); } - template<typename iType> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember > @@ -545,7 +544,6 @@ Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember > return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count); } - KOKKOS_INLINE_FUNCTION Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) { return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread); @@ -556,14 +554,10 @@ Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::Qt return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread); } -} // namespace Kokkos - -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ template<typename iType, class Lambda> KOKKOS_INLINE_FUNCTION void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) { @@ -618,9 +612,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth #endif /* #if defined( KOKKOS_HAVE_CXX11 ) */ -} // namespace Kokkos - -namespace Kokkos { /** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. * * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. @@ -707,10 +698,6 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth } } -} // namespace Kokkos - -namespace Kokkos { - template<class FunctorType> KOKKOS_INLINE_FUNCTION void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) { @@ -740,6 +727,4 @@ void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& singl } // namespace Kokkos - #endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */ - diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp index 8cc39d277c1949dc7f9587c09b77d5a71ffddeba..e651b9fdb8cf8ad2f5067215bec6aff60a5c6adb 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp @@ -58,7 +58,7 @@ #include <Kokkos_Atomic.hpp> #include <Qthread/Kokkos_Qthread_TaskPolicy.hpp> -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- @@ -196,7 +196,7 @@ void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) static const char msg_error_dependences[] = ": destroy task that has dependences" ; static const char msg_error_exception[] = ": caught internal exception" ; - if ( rhs ) { Kokkos::atomic_fetch_add( & (*rhs).m_ref_count , 1 ); } + if ( rhs ) { Kokkos::atomic_increment( &(*rhs).m_ref_count ); } Task * const lhs_val = Kokkos::atomic_exchange( lhs , rhs ); @@ -486,6 +486,6 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) } // namespace Experimental } // namespace Kokkos -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */ diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp index 22a565503dd59626057bae12ef01cb9abdb994f9..565dbf7e61716717bdbac0e1b3adf007493cf27d 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp +++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp @@ -43,8 +43,8 @@ // Experimental unified task-data parallel manycore LDRD -#ifndef KOKKOS_QTHREAD_TASKPOLICY_HPP -#define KOKKOS_QTHREAD_TASKPOLICY_HPP +#ifndef KOKKOS_QTHREAD_TASKSCHEDULER_HPP +#define KOKKOS_QTHREAD_TASKSCHEDULER_HPP #include <string> #include <typeinfo> @@ -64,12 +64,12 @@ //---------------------------------------------------------------------------- #include <Kokkos_Qthread.hpp> -#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_TaskScheduler.hpp> #include <Kokkos_View.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- @@ -154,7 +154,7 @@ public: KOKKOS_FUNCTION static TaskMember * verify_type( TaskMember * t ) { - enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value }; + enum { check_type = ! std::is_same< ResultType , void >::value }; if ( check_type && t != 0 ) { @@ -298,7 +298,7 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static - void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + void apply_single( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t ) { typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; @@ -314,7 +314,7 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static - void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) + void apply_single( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t ) { typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; @@ -332,7 +332,7 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static - void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t + void apply_team( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t , Kokkos::Impl::QthreadTeamPolicyMember & member ) { typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; @@ -344,7 +344,7 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static - void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t + void apply_team( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t , Kokkos::Impl::QthreadTeamPolicyMember & member ) { typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; @@ -575,10 +575,10 @@ public: template< class A1 , class A2 , class A3 , class A4 > void add_dependence( const Future<A1,A2> & after , const Future<A3,A4> & before - , typename Kokkos::Impl::enable_if - < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value + , typename std::enable_if + < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value && - Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value >::type * = 0 ) { @@ -621,8 +621,8 @@ public: template< class FunctorType , class A3 , class A4 > void add_dependence( FunctorType * task_functor , const Future<A3,A4> & before - , typename Kokkos::Impl::enable_if - < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value + , typename std::enable_if + < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value >::type * = 0 ) { @@ -659,6 +659,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #define KOKKOS_QTHREAD_TASK_HPP */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 5f0b8f70cd8ef36dd153b7bcbb84c42300f4fa6e..9f6e3d37b1b37298655652707980a035ba4eaa95 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -714,12 +714,12 @@ void ThreadsExec::initialize( unsigned thread_count , } // Check for over-subscription - if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { - std::cout << "Kokkos::Threads::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; - std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; - std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; - std::cout << " Requested: " << thread_count << " threads per process." << std::endl; - } + //if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { + // std::cout << "Kokkos::Threads::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; + // std::cout << " Detected: " << Impl::processors_per_node() << " cores per node." << std::endl; + // std::cout << " Detected: " << Impl::mpi_ranks_per_node() << " MPI_ranks per node." << std::endl; + // std::cout << " Requested: " << thread_count << " threads per process." << std::endl; + //} // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index 3407ffaa54149499d5046ae887a3b415627287b6..4256b0aa67f75125d7f4d6ef6d652f206e9fd847 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -406,6 +406,8 @@ public: m_exec->barrier(); } } + else + { m_invalid_thread = 1; } } ThreadsExecTeamMember() @@ -460,7 +462,7 @@ public: if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size; - if(m_league_rank>=0) + if((m_league_rank>=0) && (m_league_rank < m_league_chunk_end)) return true; return false; } @@ -704,23 +706,22 @@ public: namespace Kokkos { -template<typename iType> +template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember> -TeamThreadRange(const Impl::ThreadsExecTeamMember& thread, const iType& count) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember > +TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,count); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, count ); } -template<typename iType> +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember> -TeamThreadRange( const Impl::ThreadsExecTeamMember& thread - , const iType & begin - , const iType & end - ) +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::ThreadsExecTeamMember> +TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin, const iType2 & end ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,begin,end); + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) ); } diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp deleted file mode 100644 index e1599284b297bee7a770d2a6ce87a429a9e5d08a..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp +++ /dev/null @@ -1,930 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#include <stdio.h> -#include <iostream> -#include <sstream> -#include <Kokkos_Core.hpp> -#include <Threads/Kokkos_Threads_TaskPolicy.hpp> - -#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) - -#define QLOCK (reinterpret_cast<void*>( ~((uintptr_t)0) )) -#define QDENIED (reinterpret_cast<void*>( ~((uintptr_t)0) - 1 )) - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -void ThreadsTaskPolicyQueue::Destroy::destroy_shared_allocation() -{ - // Verify the queue is empty - - if ( m_policy->m_count_ready || - m_policy->m_team[0] || - m_policy->m_team[1] || - m_policy->m_team[2] || - m_policy->m_serial[0] || - m_policy->m_serial[1] || - m_policy->m_serial[2] ) { - Kokkos::abort("ThreadsTaskPolicyQueue ERROR : Attempt to destroy non-empty queue" ); - } - - m_policy->~ThreadsTaskPolicyQueue(); -} - -//---------------------------------------------------------------------------- - -ThreadsTaskPolicyQueue::~ThreadsTaskPolicyQueue() -{ -} - -ThreadsTaskPolicyQueue::ThreadsTaskPolicyQueue - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ) - : m_space( Kokkos::Threads::memory_space() - , arg_task_max_size * arg_task_max_count * 1.2 - , 16 /* log2(superblock size) */ - ) - , m_team { 0 , 0 , 0 } - , m_serial { 0 , 0 , 0 } - , m_team_size( arg_task_team_size ) - , m_default_dependence_capacity( arg_task_default_dependence_capacity ) - , m_count_ready(0) - , m_count_alloc(0) -{ - const int threads_total = Threads::thread_pool_size(0); - const int threads_per_numa = Threads::thread_pool_size(1); - const int threads_per_core = Threads::thread_pool_size(2); - - if ( 0 == m_team_size ) { - // If a team task then claim for execution until count is zero - // Issue: team collectives cannot assume which pool members are in the team. - // Issue: team must only span a single NUMA region. - - // If more than one thread per core then map cores to work team, - // else map numa to work team. - - if ( 1 < threads_per_core ) m_team_size = threads_per_core ; - else if ( 1 < threads_per_numa ) m_team_size = threads_per_numa ; - else m_team_size = 1 ; - } - - // Verify a valid team size - const bool valid_team_size = - ( 0 < m_team_size && m_team_size <= threads_total ) && - ( - ( 1 == m_team_size ) || - ( threads_per_core == m_team_size ) || - ( threads_per_numa == m_team_size ) - ); - - if ( ! valid_team_size ) { - std::ostringstream msg ; - - msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Threads > ERROR" - << " invalid team_size(" << m_team_size << ")" - << " threads_per_core(" << threads_per_core << ")" - << " threads_per_numa(" << threads_per_numa << ")" - << " threads_total(" << threads_total << ")" - ; - - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - Kokkos::memory_fence(); -} - -//---------------------------------------------------------------------------- - -void ThreadsTaskPolicyQueue::driver( Kokkos::Impl::ThreadsExec & exec - , const void * arg ) -{ - // Whole thread pool is calling this function - - typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; - - ThreadsTaskPolicyQueue & self = - * reinterpret_cast< ThreadsTaskPolicyQueue * >( const_cast<void*>(arg) ); - - // Create the thread team member with shared memory for the given task. - - const TeamPolicy< Kokkos::Threads > team_policy( 1 , self.m_team_size ); - - member_type team_member( & exec , team_policy , 0 ); - - Kokkos::Impl::ThreadsExec & exec_team_base = - team_member.threads_exec_team_base(); - - task_root_type * volatile * const task_team_ptr = - reinterpret_cast<task_root_type**>( exec_team_base.reduce_memory() ); - - volatile int * const work_team_ptr = - reinterpret_cast<volatile int*>( task_team_ptr + 1 ); - - // Each team must iterate this loop synchronously - // to insure team-execution of team-task. - - const bool team_lead = team_member.team_fan_in(); - - bool work_team = true ; - - while ( work_team ) { - - task_root_type * task = 0 ; - - // Start here with members in a fan_in state - - if ( team_lead ) { - // Team lead queries the ready count for a team-consistent view. - *work_team_ptr = 0 != self.m_count_ready ; - - // Only the team lead attempts to pop a team task from the queues - for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { - if ( ( i < 2 /* regular queue */ ) - || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) { - task = pop_ready_task( & self.m_team[i] ); - } - } - - *task_team_ptr = task ; - } - - Kokkos::memory_fence(); - - team_member.team_fan_out(); - - work_team = *work_team_ptr ; - - // Query if team acquired a team task - - if ( 0 != ( task = *task_team_ptr ) ) { - // Set shared memory - team_member.set_league_shmem( 0 , 1 , task->m_shmem_size ); - - (*task->m_team)( task , team_member ); - - // The team task called the functor, - // called the team_fan_in(), and - // if completed the team lead destroyed the task functor. - - if ( team_lead ) { - self.complete_executed_task( task ); - } - } - else { - // No team task acquired, each thread try a serial task - // Try the priority queue, then the regular queue. - for ( int i = 0 ; i < int(NPRIORITY) && 0 == task ; ++i ) { - if ( ( i < 2 /* regular queue */ ) - || ( ! self.m_space.is_empty() /* waiting for memory queue */ ) ) { - task = pop_ready_task( & self.m_serial[i] ); - } - } - - if ( 0 != task ) { - - (*task->m_serial)( task ); - - self.complete_executed_task( task ); - } - - team_member.team_fan_in(); - } - } - - team_member.team_fan_out(); - - exec.fan_in(); -} - -//---------------------------------------------------------------------------- - -ThreadsTaskPolicyQueue::task_root_type * -ThreadsTaskPolicyQueue::pop_ready_task( - ThreadsTaskPolicyQueue::task_root_type * volatile * const queue ) -{ - task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); - task_root_type * task = 0 ; - task_root_type * const task_claim = *queue ; - - if ( ( q_lock != task_claim ) && ( 0 != task_claim ) ) { - - // Queue is not locked and not null, try to claim head of queue. - // Is a race among threads to claim the queue. - - if ( task_claim == atomic_compare_exchange(queue,task_claim,q_lock) ) { - - // Aquired the task which must be in the waiting state. - - const int claim_state = - atomic_compare_exchange( & task_claim->m_state - , int(TASK_STATE_WAITING) - , int(TASK_STATE_EXECUTING) ); - - task_root_type * lock_verify = 0 ; - - if ( claim_state == int(TASK_STATE_WAITING) ) { - - // Transitioned this task from waiting to executing - // Update the queue to the next entry and release the lock - - task_root_type * const next = - *((task_root_type * volatile *) & task_claim->m_next ); - - *((task_root_type * volatile *) & task_claim->m_next ) = 0 ; - - lock_verify = atomic_compare_exchange( queue , q_lock , next ); - } - - if ( ( claim_state != int(TASK_STATE_WAITING) ) | - ( q_lock != lock_verify ) ) { - - fprintf(stderr,"ThreadsTaskPolicyQueue::pop_ready_task(0x%lx) task(0x%lx) state(%d) ERROR %s\n" - , (unsigned long) queue - , (unsigned long) task - , claim_state - , ( claim_state != int(TASK_STATE_WAITING) - ? "NOT WAITING" - : "UNLOCK" ) ); - fflush(stderr); - Kokkos::abort("ThreadsTaskPolicyQueue::pop_ready_task"); - } - - task = task_claim ; - } - } - - return task ; -} - -//---------------------------------------------------------------------------- - -void ThreadsTaskPolicyQueue::complete_executed_task( - ThreadsTaskPolicyQueue::task_root_type * task ) -{ - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - // State is either executing or if respawned then waiting, - // try to transition from executing to complete. - // Reads the current value. - - const int state_old = - atomic_compare_exchange( & task->m_state - , int(Kokkos::Experimental::TASK_STATE_EXECUTING) - , int(Kokkos::Experimental::TASK_STATE_COMPLETE) ); - - if ( int(Kokkos::Experimental::TASK_STATE_WAITING) == state_old ) { - // Task requested a respawn so reschedule it. - // The reference count will be incremented if placed in a queue. - schedule_task( task , false /* not the initial spawn */ ); - } - else if ( int(Kokkos::Experimental::TASK_STATE_EXECUTING) == state_old ) { - /* Task is complete */ - - // Clear dependences of this task before locking wait queue - - task->clear_dependence(); - - // Stop other tasks from adding themselves to this task's wait queue. - // The wait queue is updated concurrently so guard with an atomic. - - task_root_type * wait_queue = *((task_root_type * volatile *) & task->m_wait ); - task_root_type * wait_queue_old = 0 ; - - do { - wait_queue_old = wait_queue ; - wait_queue = atomic_compare_exchange( & task->m_wait , wait_queue_old , q_denied ); - } while ( wait_queue_old != wait_queue ); - - // The task has been removed from ready queue and - // execution is complete so decrement the reference count. - // The reference count was incremented by the initial spawning. - // The task may be deleted if this was the last reference. - task_root_type::assign( & task , 0 ); - - // Pop waiting tasks and schedule them - while ( wait_queue ) { - task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; - schedule_task( x , false /* not the initial spawn */ ); - } - } - else { - fprintf( stderr - , "ThreadsTaskPolicyQueue::complete_executed_task(0x%lx) ERROR state_old(%d) dep_size(%d)\n" - , (unsigned long)( task ) - , int(state_old) - , task->m_dep_size - ); - fflush( stderr ); - Kokkos::abort("ThreadsTaskPolicyQueue::complete_executed_task" ); - } - - // If the task was respawned it may have already been - // put in a ready queue and the count incremented. - // By decrementing the count last it will never go to zero - // with a ready or executing task. - - atomic_fetch_add( & m_count_ready , -1 ); -} - -//---------------------------------------------------------------------------- - -void ThreadsTaskPolicyQueue::reschedule_task( - ThreadsTaskPolicyQueue::task_root_type * const task ) -{ - // Reschedule transitions from executing back to waiting. - const int old_state = - atomic_compare_exchange( & task->m_state - , int(TASK_STATE_EXECUTING) - , int(TASK_STATE_WAITING) ); - - if ( old_state != int(TASK_STATE_EXECUTING) ) { - - fprintf( stderr - , "ThreadsTaskPolicyQueue::reschedule_task(0x%lx) ERROR state(%d)\n" - , (unsigned long) task - , old_state - ); - fflush(stderr); - Kokkos::abort("ThreadsTaskPolicyQueue::reschedule" ); - } -} - -void ThreadsTaskPolicyQueue::schedule_task - ( ThreadsTaskPolicyQueue::task_root_type * const task - , const bool initial_spawn ) -{ - task_root_type * const q_lock = reinterpret_cast<task_root_type*>(QLOCK); - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - //---------------------------------------- - // State is either constructing or already waiting. - // If constructing then transition to waiting. - - { - const int old_state = atomic_compare_exchange( & task->m_state - , int(TASK_STATE_CONSTRUCTING) - , int(TASK_STATE_WAITING) ); - - // Head of linked list of tasks waiting on this task - task_root_type * const waitTask = - *((task_root_type * volatile const *) & task->m_wait ); - - // Member of linked list of tasks waiting on some other task - task_root_type * const next = - *((task_root_type * volatile const *) & task->m_next ); - - // An incomplete and non-executing task has: - // task->m_state == TASK_STATE_CONSTRUCTING or TASK_STATE_WAITING - // task->m_wait != q_denied - // task->m_next == 0 - // - if ( ( q_denied == waitTask ) || - ( 0 != next ) || - ( old_state != int(TASK_STATE_CONSTRUCTING) && - old_state != int(TASK_STATE_WAITING) ) ) { - fprintf(stderr,"ThreadsTaskPolicyQueue::schedule_task(0x%lx) STATE ERROR: state(%d) wait(0x%lx) next(0x%lx)\n" - , (unsigned long) task - , old_state - , (unsigned long) waitTask - , (unsigned long) next ); - fflush(stderr); - Kokkos::abort("ThreadsTaskPolicyQueue::schedule" ); - } - } - - //---------------------------------------- - - if ( initial_spawn ) { - // The initial spawn of a task increments the reference count - // for the task's existence in either a waiting or ready queue - // until the task has completed. - // Completing the task's execution is the matching - // decrement of the reference count. - - task_root_type::assign( 0 , task ); - } - - //---------------------------------------- - // Insert this task into a dependence task that is not complete. - // Push on to that task's wait queue. - - bool attempt_insert_in_queue = true ; - - task_root_type * volatile * queue = - task->m_dep_size ? & task->m_dep[0]->m_wait : (task_root_type **) 0 ; - - for ( int i = 0 ; attempt_insert_in_queue && ( 0 != queue ) ; ) { - - task_root_type * const head_value_old = *queue ; - - if ( q_denied == head_value_old ) { - // Wait queue is closed because task is complete, - // try again with the next dependence wait queue. - ++i ; - queue = i < task->m_dep_size ? & task->m_dep[i]->m_wait - : (task_root_type **) 0 ; - } - else { - - // Wait queue is open and not denied. - // Have exclusive access to this task. - // Assign m_next assuming a successfull insertion into the queue. - // Fence the memory assignment before attempting the CAS. - - *((task_root_type * volatile *) & task->m_next ) = head_value_old ; - - memory_fence(); - - // Attempt to insert this task into the queue. - // If fails then continue the attempt. - - attempt_insert_in_queue = - head_value_old != atomic_compare_exchange(queue,head_value_old,task); - } - } - - //---------------------------------------- - // All dependences are complete, insert into the ready list - - if ( attempt_insert_in_queue ) { - - // Increment the count of ready tasks. - // Count will be decremented when task is complete. - - atomic_fetch_add( & m_count_ready , 1 ); - - queue = task->m_queue ; - - while ( attempt_insert_in_queue ) { - - // A locked queue is being popped. - - task_root_type * const head_value_old = *queue ; - - if ( q_lock != head_value_old ) { - // Read the head of ready queue, - // if same as previous value then CAS locks the ready queue - - // Have exclusive access to this task, - // assign to head of queue, assuming successful insert - // Fence assignment before attempting insert. - *((task_root_type * volatile *) & task->m_next ) = head_value_old ; - - memory_fence(); - - attempt_insert_in_queue = - head_value_old != atomic_compare_exchange(queue,head_value_old,task); - } - } - } -} - - -void TaskMember< Kokkos::Threads , void , void >::latch_add( const int k ) -{ - typedef TaskMember< Kokkos::Threads , void , void > task_root_type ; - - task_root_type * const q_denied = reinterpret_cast<task_root_type*>(QDENIED); - - const bool ok_input = 0 < k ; - - const int count = ok_input ? atomic_fetch_add( & m_dep_size , -k ) - k - : k ; - - const bool ok_count = 0 <= count ; - - const int state = 0 != count ? TASK_STATE_WAITING : - atomic_compare_exchange( & m_state - , TASK_STATE_WAITING - , TASK_STATE_COMPLETE ); - - const bool ok_state = state == TASK_STATE_WAITING ; - - if ( ! ok_count || ! ok_state ) { - printf( "ThreadsTaskPolicyQueue::latch_add[0x%lx](%d) ERROR %s %d\n" - , (unsigned long) this - , k - , ( ! ok_input ? "Non-positive input" : - ( ! ok_count ? "Negative count" : "Bad State" ) ) - , ( ! ok_input ? k : - ( ! ok_count ? count : state ) ) - ); - Kokkos::abort( "ThreadsTaskPolicyQueue::latch_add ERROR" ); - } - else if ( 0 == count ) { - // Stop other tasks from adding themselves to this latch's wait queue. - // The wait queue is updated concurrently so guard with an atomic. - - ThreadsTaskPolicyQueue & policy = *m_policy ; - task_root_type * wait_queue = *((task_root_type * volatile *) &m_wait); - task_root_type * wait_queue_old = 0 ; - - do { - wait_queue_old = wait_queue ; - wait_queue = atomic_compare_exchange( & m_wait , wait_queue_old , q_denied ); - } while ( wait_queue_old != wait_queue ); - - // Pop waiting tasks and schedule them - while ( wait_queue ) { - task_root_type * const x = wait_queue ; wait_queue = x->m_next ; x->m_next = 0 ; - policy.schedule_task( x , false /* not initial spawn */ ); - } - } -} - -//---------------------------------------------------------------------------- - -void ThreadsTaskPolicyQueue::deallocate_task( void * ptr , unsigned size_alloc ) -{ -/* - const int n = atomic_fetch_add( & alloc_count , -1 ) - 1 ; - - fprintf( stderr - , "ThreadsTaskPolicyQueue::deallocate_task(0x%lx,%d) count(%d)\n" - , (unsigned long) ptr - , size_alloc - , n - ); - fflush( stderr ); -*/ - - m_space.deallocate( ptr , size_alloc ); - - Kokkos::atomic_decrement( & m_count_alloc ); -} - -ThreadsTaskPolicyQueue::task_root_type * -ThreadsTaskPolicyQueue::allocate_task - ( const unsigned arg_sizeof_task - , const unsigned arg_dep_capacity - , const unsigned arg_team_shmem - ) -{ - const unsigned base_size = arg_sizeof_task + - ( arg_sizeof_task % sizeof(task_root_type*) - ? sizeof(task_root_type*) - arg_sizeof_task % sizeof(task_root_type*) - : 0 ); - - const unsigned dep_capacity - = ~0u == arg_dep_capacity - ? m_default_dependence_capacity - : arg_dep_capacity ; - - const unsigned size_alloc = - base_size + sizeof(task_root_type*) * dep_capacity ; - -#if 0 - // User created task memory pool with an estimate, - // if estimate is to low then report and throw exception. - - if ( m_space.get_min_block_size() < size_alloc ) { - fprintf(stderr,"TaskPolicy<Threads> task allocation requires %d bytes on memory pool with %d byte chunk size\n" - , int(size_alloc) - , int(m_space.get_min_block_size()) - ); - fflush(stderr); - Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::task_allocate"); - } -#endif - - task_root_type * const task = - reinterpret_cast<task_root_type*>( m_space.allocate( size_alloc ) ); - - if ( task != 0 ) { - - // Initialize task's root and value data structure - // Calling function must copy construct the functor. - - new( (void*) task ) task_root_type(); - - task->m_policy = this ; - task->m_size_alloc = size_alloc ; - task->m_dep_capacity = dep_capacity ; - task->m_shmem_size = arg_team_shmem ; - - if ( dep_capacity ) { - task->m_dep = - reinterpret_cast<task_root_type**>( - reinterpret_cast<unsigned char*>(task) + base_size ); - - for ( unsigned i = 0 ; i < dep_capacity ; ++i ) - task->task_root_type::m_dep[i] = 0 ; - } - - Kokkos::atomic_increment( & m_count_alloc ); - } - return task ; -} - - -//---------------------------------------------------------------------------- - -void ThreadsTaskPolicyQueue::add_dependence - ( ThreadsTaskPolicyQueue::task_root_type * const after - , ThreadsTaskPolicyQueue::task_root_type * const before - ) -{ - if ( ( after != 0 ) && ( before != 0 ) ) { - - int const state = *((volatile const int *) & after->m_state ); - - // Only add dependence during construction or during execution. - // Both tasks must have the same policy. - // Dependence on non-full memory cannot be mixed with any other dependence. - - const bool ok_state = - Kokkos::Experimental::TASK_STATE_CONSTRUCTING == state || - Kokkos::Experimental::TASK_STATE_EXECUTING == state ; - - const bool ok_capacity = - after->m_dep_size < after->m_dep_capacity ; - - const bool ok_policy = - after->m_policy == this && before->m_policy == this ; - - if ( ok_state && ok_capacity && ok_policy ) { - - ++after->m_dep_size ; - - task_root_type::assign( after->m_dep + (after->m_dep_size-1) , before ); - - memory_fence(); - } - else { - -fprintf( stderr - , "ThreadsTaskPolicyQueue::add_dependence( 0x%lx , 0x%lx ) ERROR %s\n" - , (unsigned long) after - , (unsigned long) before - , ( ! ok_state ? "Task not constructing or executing" : - ( ! ok_capacity ? "Task Exceeded dependence capacity" - : "Tasks from different policies" - )) ); - -fflush( stderr ); - - Kokkos::abort("ThreadsTaskPolicyQueue::add_dependence ERROR"); - } - } -} - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -TaskPolicy< Kokkos::Threads >::TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size // Application's task size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ) - : m_track() - , m_policy(0) -{ - typedef Kokkos::Experimental::Impl::SharedAllocationRecord - < Kokkos::HostSpace , Impl::ThreadsTaskPolicyQueue::Destroy > record_type ; - - record_type * record = - record_type::allocate( Kokkos::HostSpace() - , "Threads task queue" - , sizeof(Impl::ThreadsTaskPolicyQueue) - ); - - m_policy = - reinterpret_cast< Impl::ThreadsTaskPolicyQueue * >( record->data() ); - - // Tasks are allocated with application's task size + sizeof(task_root_type) - - const size_t full_task_size_estimate = - arg_task_max_size + - sizeof(task_root_type) + - sizeof(task_root_type*) * arg_task_default_dependence_capacity ; - - new( m_policy ) - Impl::ThreadsTaskPolicyQueue( arg_task_max_count - , full_task_size_estimate - , arg_task_default_dependence_capacity - , arg_task_team_size ); - - record->m_destroy.m_policy = m_policy ; - - m_track.assign_allocated_record_to_uninitialized( record ); -} - - -TaskPolicy< Kokkos::Threads >::member_type & -TaskPolicy< Kokkos::Threads >::member_single() -{ - static member_type s ; - return s ; -} - -void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Threads > & policy ) -{ - typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; - - enum { BASE_SHMEM = 1024 }; - - Kokkos::Impl::ThreadsExec::resize_scratch( 0 , member_type::team_reduce_size() + BASE_SHMEM ); - - Kokkos::Impl::ThreadsExec::start( & Impl::ThreadsTaskPolicyQueue::driver - , policy.m_policy ); - - Kokkos::Impl::ThreadsExec::fence(); -} - -} /* namespace Experimental */ -} /* namespace Kokkos */ - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -typedef TaskMember< Kokkos::Threads , void , void > Task ; - -//---------------------------------------------------------------------------- - -Task::~TaskMember() -{ -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - -void Task::assign( Task ** const lhs_ptr , Task * rhs ) -{ - Task * const q_denied = reinterpret_cast<Task*>(QDENIED); - - // Increment rhs reference count. - if ( rhs ) { atomic_fetch_add( & rhs->m_ref_count , 1 ); } - - if ( 0 == lhs_ptr ) return ; - - // Must have exclusive access to *lhs_ptr. - // Assign the pointer and retrieve the previous value. - -#if 1 - - Task * const old_lhs = *lhs_ptr ; - - *lhs_ptr = rhs ; - -#elif 0 - - Task * const old_lhs = *((Task*volatile*)lhs_ptr); - - *((Task*volatile*)lhs_ptr) = rhs ; - - Kokkos::memory_fence(); - -#else - - Task * const old_lhs = atomic_exchange( lhs_ptr , rhs ); - -#endif - - if ( old_lhs && rhs && old_lhs->m_policy != rhs->m_policy ) { - Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR different queues"); - } - - if ( old_lhs ) { - - // Decrement former lhs reference count. - // If reference count is zero task must be complete, then delete task. - // Task is ready for deletion when wait == q_denied - int const count = atomic_fetch_add( & (old_lhs->m_ref_count) , -1 ) - 1 ; - int const state = old_lhs->m_state ; - Task * const wait = *((Task * const volatile *) & old_lhs->m_wait ); - - const bool ok_count = 0 <= count ; - - // If count == 0 then will be deleting - // and must either be constructing or complete. - const bool ok_state = 0 < count ? true : - ( ( state == int(TASK_STATE_CONSTRUCTING) && wait == 0 ) || - ( state == int(TASK_STATE_COMPLETE) && wait == q_denied ) ) - && - old_lhs->m_next == 0 && - old_lhs->m_dep_size == 0 ; - - if ( ! ok_count || ! ok_state ) { - - fprintf( stderr , "Kokkos::Impl::TaskManager<Kokkos::Threads>::assign ERROR deleting task(0x%lx) m_ref_count(%d) , m_wait(0x%ld)\n" - , (unsigned long) old_lhs - , count - , (unsigned long) wait ); - fflush(stderr); - Kokkos::abort( "Kokkos::Impl::TaskMember<Kokkos::Threads>::assign ERROR deleting"); - } - - if ( count == 0 ) { - // When 'count == 0' this thread has exclusive access to 'old_lhs' - - ThreadsTaskPolicyQueue & queue = *( old_lhs->m_policy ); - - queue.deallocate_task( old_lhs , old_lhs->m_size_alloc ); - } - } -} - -#endif - -//---------------------------------------------------------------------------- - -Task * Task::get_dependence( int i ) const -{ - Task * const t = m_dep[i] ; - - if ( Kokkos::Experimental::TASK_STATE_EXECUTING != m_state || i < 0 || m_dep_size <= i || 0 == t ) { - -fprintf( stderr - , "TaskMember< Threads >::get_dependence ERROR : task[%lx]{ state(%d) dep_size(%d) dep[%d] = %lx }\n" - , (unsigned long) this - , m_state - , m_dep_size - , i - , (unsigned long) t - ); -fflush( stderr ); - - Kokkos::Impl::throw_runtime_exception("TaskMember< Threads >::get_dependence ERROR"); - } - - return t ; -} - -//---------------------------------------------------------------------------- - -void Task::clear_dependence() -{ - for ( int i = m_dep_size - 1 ; 0 <= i ; --i ) { - assign( m_dep + i , 0 ); - } - - *((volatile int *) & m_dep_size ) = 0 ; - - memory_fence(); -} - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ - diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp deleted file mode 100644 index 116d32e4fc4d6c6da2968518caacc133e7488ab4..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp +++ /dev/null @@ -1,745 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#ifndef KOKKOS_THREADS_TASKPOLICY_HPP -#define KOKKOS_THREADS_TASKPOLICY_HPP - - -#include <Kokkos_Threads.hpp> -#include <Kokkos_TaskPolicy.hpp> - -#if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -struct ThreadsTaskPolicyQueue ; - -/** \brief Base class for all Kokkos::Threads tasks */ -template<> -class TaskMember< Kokkos::Threads , void , void > { -public: - - template < class > friend class Kokkos::Experimental::TaskPolicy ; - friend struct ThreadsTaskPolicyQueue ; - - typedef TaskMember * (* function_verify_type) ( TaskMember * ); - typedef void (* function_single_type) ( TaskMember * ); - typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::ThreadsExecTeamMember & ); - -private: - - - ThreadsTaskPolicyQueue * m_policy ; - TaskMember * volatile * m_queue ; - function_verify_type m_verify ; - function_team_type m_team ; ///< Apply function - function_single_type m_serial ; ///< Apply function - TaskMember ** m_dep ; ///< Dependences - TaskMember * m_wait ; ///< Head of linked list of tasks waiting on this task - TaskMember * m_next ; ///< Member of linked list of tasks - int m_dep_capacity ; ///< Capacity of dependences - int m_dep_size ; ///< Actual count of dependences - int m_size_alloc ; - int m_shmem_size ; - int m_ref_count ; ///< Reference count - int m_state ; ///< State of the task - - - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; - -protected: - - TaskMember() - : m_policy(0) - , m_verify(0) - , m_team(0) - , m_serial(0) - , m_dep(0) - , m_wait(0) - , m_next(0) - , m_dep_capacity(0) - , m_dep_size(0) - , m_size_alloc(0) - , m_shmem_size(0) - , m_ref_count(0) - , m_state( TASK_STATE_CONSTRUCTING ) - {} - -public: - - ~TaskMember(); - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return *((volatile int *) & m_ref_count ); } - - template< typename ResultType > - KOKKOS_FUNCTION static - TaskMember * verify_type( TaskMember * t ) - { - enum { check_type = ! std::is_same< ResultType , void >::value }; - - if ( check_type && t != 0 ) { - - // Verify that t->m_verify is this function - const function_verify_type self = & TaskMember::template verify_type< ResultType > ; - - if ( t->m_verify != self ) { - t = 0 ; - Kokkos::abort("TaskPolicy< Threads > verify_result_type" ); - } - } - return t ; - } - - //---------------------------------------- - /* Inheritence Requirements on task types: - * - * class TaskMember< Threads , DerivedType::value_type , FunctorType > - * : public TaskMember< Threads , DerivedType::value_type , void > - * , public Functor - * { ... }; - * - * If value_type != void - * class TaskMember< Threads , value_type , void > - * : public TaskMember< Threads , void , void > - * - */ - //---------------------------------------- - - template< class DerivedTaskType , class Tag > - KOKKOS_FUNCTION static - void apply_single( - typename std::enable_if - <( std::is_same<Tag,void>::value && - std::is_same< typename DerivedTaskType::result_type , void >::value - ), TaskMember * >::type t ) - { - { - typedef typename DerivedTaskType::functor_type functor_type ; - - functor_type * const f = - static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); - - f->apply(); - - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - } - - template< class DerivedTaskType , class Tag > - KOKKOS_FUNCTION static - void apply_single( - typename std::enable_if - <( std::is_same< Tag , void >::value && - ! std::is_same< typename DerivedTaskType::result_type , void >::value - ), TaskMember * >::type t ) - { - { - typedef typename DerivedTaskType::functor_type functor_type ; - - DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); - functor_type * const f = static_cast< functor_type * >( self ); - - f->apply( self->m_result ); - - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - } - - //---------------------------------------- - - template< class DerivedTaskType , class Tag > - KOKKOS_FUNCTION static - void apply_team( - typename std::enable_if - <( std::is_same<Tag,void>::value && - std::is_same<typename DerivedTaskType::result_type,void>::value - ), TaskMember * >::type t - , Kokkos::Impl::ThreadsExecTeamMember & member - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - functor_type * const f = - static_cast< functor_type * >( static_cast< DerivedTaskType * >(t) ); - - f->apply( member ); - - // Synchronize for possible functor destruction and - // completion of team task. - if ( member.team_fan_in() ) { - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - } - - template< class DerivedTaskType , class Tag > - KOKKOS_FUNCTION static - void apply_team( - typename std::enable_if - <( std::is_same<Tag,void>::value && - ! std::is_same<typename DerivedTaskType::result_type,void>::value - ), TaskMember * >::type t - , Kokkos::Impl::ThreadsExecTeamMember & member - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - - DerivedTaskType * const self = static_cast< DerivedTaskType * >(t); - functor_type * const f = static_cast< functor_type * >( self ); - - f->apply( member , self->m_result ); - - // Synchronize for possible functor destruction and - // completion of team task. - if ( member.team_fan_in() ) { - if ( t->m_state == int(Kokkos::Experimental::TASK_STATE_EXECUTING) ) { - f->~functor_type(); - } - } - } - - //---------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - static - void assign( TaskMember ** const lhs , TaskMember * const rhs ); -#else - KOKKOS_INLINE_FUNCTION static - void assign( TaskMember ** const lhs , TaskMember * const rhs ) {} -#endif - - TaskMember * get_dependence( int i ) const ; - - KOKKOS_INLINE_FUNCTION - int get_dependence() const { return m_dep_size ; } - - void clear_dependence(); - - void latch_add( const int k ); - - //---------------------------------------- - - typedef FutureValueTypeIsVoidError get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return get_result_type() ; } - - inline static - void construct_result( TaskMember * const ) {} - - KOKKOS_INLINE_FUNCTION - Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } - -}; - -/** \brief A Future< Kokkos::Threads , ResultType > will cast - * from TaskMember< Kokkos::Threads , void , void > - * to TaskMember< Kokkos::Threads , ResultType , void > - * to query the result. - */ -template< class ResultType > -class TaskMember< Kokkos::Threads , ResultType , void > - : public TaskMember< Kokkos::Threads , void , void > -{ -public: - - typedef ResultType result_type ; - - result_type m_result ; - - typedef const result_type & get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return m_result ; } - - inline static - void construct_result( TaskMember * const ptr ) - { - new((void*)(& ptr->m_result)) result_type(); - } - - inline - TaskMember() : TaskMember< Kokkos::Threads , void , void >(), m_result() {} - - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; -}; - -/** \brief Callback functions will cast - * from TaskMember< Kokkos::Threads , void , void > - * to TaskMember< Kokkos::Threads , ResultType , FunctorType > - * to execute work functions. - */ -template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Threads , ResultType , FunctorType > - : public TaskMember< Kokkos::Threads , ResultType , void > - , public FunctorType -{ -public: - typedef ResultType result_type ; - typedef FunctorType functor_type ; - - inline - TaskMember( const functor_type & arg_functor ) - : TaskMember< Kokkos::Threads , ResultType , void >() - , functor_type( arg_functor ) - {} - - inline static - void copy_construct( TaskMember * const ptr - , const functor_type & arg_functor ) - { - typedef TaskMember< Kokkos::Threads , ResultType , void > base_type ; - - new((void*)static_cast<FunctorType*>(ptr)) functor_type( arg_functor ); - - base_type::construct_result( static_cast<base_type*>( ptr ) ); - } - - TaskMember() = delete ; - TaskMember( TaskMember && ) = delete ; - TaskMember( const TaskMember & ) = delete ; - TaskMember & operator = ( TaskMember && ) = delete ; - TaskMember & operator = ( const TaskMember & ) = delete ; -}; - -//---------------------------------------------------------------------------- - -struct ThreadsTaskPolicyQueue { - - enum { NPRIORITY = 3 }; - - typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads > - memory_space ; - - typedef Kokkos::Experimental::Impl::TaskMember< Kokkos::Threads, void, void > - task_root_type ; - - memory_space m_space ; - task_root_type * m_team[ NPRIORITY ]; - task_root_type * m_serial[ NPRIORITY ]; - int m_team_size ; ///< Fixed size of a task-team - int m_default_dependence_capacity ; - int volatile m_count_ready ; ///< Ready plus executing tasks - int volatile m_count_alloc ; ///< Total allocated tasks - - // Execute tasks until all non-waiting tasks are complete. - static void driver( Kokkos::Impl::ThreadsExec & exec - , const void * arg ); - - task_root_type * allocate_task - ( const unsigned arg_sizeof_task - , const unsigned arg_dep_capacity - , const unsigned arg_team_shmem - ); - - void deallocate_task( void * , unsigned ); - void schedule_task( task_root_type * const - , const bool initial_spawn = true ); - void reschedule_task( task_root_type * const ); - void add_dependence( task_root_type * const after - , task_root_type * const before ); - - // When a task finishes executing update its dependences - // and either deallocate the task if complete - // or reschedule the task if respawned. - void complete_executed_task( task_root_type * ); - - // Pop a task from a ready queue - static task_root_type * - pop_ready_task( task_root_type * volatile * const queue ); - - ThreadsTaskPolicyQueue() = delete ; - ThreadsTaskPolicyQueue( ThreadsTaskPolicyQueue && ) = delete ; - ThreadsTaskPolicyQueue( const ThreadsTaskPolicyQueue & ) = delete ; - ThreadsTaskPolicyQueue & operator = ( ThreadsTaskPolicyQueue && ) = delete ; - ThreadsTaskPolicyQueue & operator = ( const ThreadsTaskPolicyQueue & ) = delete ; - - ~ThreadsTaskPolicyQueue(); - - ThreadsTaskPolicyQueue - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ); - - // Callback to destroy the shared memory tracked queue. - struct Destroy { - ThreadsTaskPolicyQueue * m_policy ; - void destroy_shared_allocation(); - }; -}; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -void wait( TaskPolicy< Kokkos::Threads > & ); - -template<> -class TaskPolicy< Kokkos::Threads > -{ -public: - - typedef Kokkos::Threads execution_space ; - typedef TaskPolicy execution_policy ; - typedef Kokkos::Impl::ThreadsExecTeamMember member_type ; - -private: - - typedef Impl::TaskMember< Kokkos::Threads , void , void > task_root_type ; - typedef Kokkos::Experimental::MemoryPool< Kokkos::Threads > memory_space ; - - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; - - track_type m_track ; - Impl::ThreadsTaskPolicyQueue * m_policy ; - - template< class FunctorType > - static inline - const task_root_type * get_task_root( const FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); - } - - template< class FunctorType > - static inline - task_root_type * get_task_root( FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< task_root_type * >( static_cast< task_type * >(f) ); - } - - /** \brief Allocate and construct a task. - * - * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] - */ - template< class DerivedTaskType , class Tag > - task_root_type * - create( const typename DerivedTaskType::functor_type & arg_functor - , const task_root_type::function_single_type arg_apply_single - , const task_root_type::function_team_type arg_apply_team - , const unsigned arg_team_shmem - , const unsigned arg_dependence_capacity - ) - { - task_root_type * const t = - m_policy->allocate_task( sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_team_shmem - ); - if ( t != 0 ) { - - DerivedTaskType * const task = static_cast<DerivedTaskType*>(t); - - DerivedTaskType::copy_construct( task , arg_functor ); - - task->task_root_type::m_verify = & task_root_type::template verify_type< typename DerivedTaskType::value_type > ; - task->task_root_type::m_team = arg_apply_team ; - task->task_root_type::m_serial = arg_apply_single ; - - // Do not proceed until initialization is written to memory - Kokkos::memory_fence(); - } - return t ; - } - -public: - - // Valid team sizes are 1, - // Threads::pool_size(1) == threads per numa, or - // Threads::pool_size(2) == threads per core - - TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity = 4 - , const unsigned arg_task_team_size = 0 /* choose default */ - ); - - KOKKOS_FUNCTION TaskPolicy() = default ; - KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int allocated_task_count() const { return m_policy->m_count_alloc ; } - - //---------------------------------------- - // Create serial-thread task - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - TaskPolicy::create< task_type , void > - ( functor - , & task_root_type::template apply_single< task_type , void > - , task_root_type::function_team_type(0) - , 0 - , dependence_capacity - ) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - proc_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) - { return task_create( functor , dependence_capacity ); } - - // Create thread-team task - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - TaskPolicy::create< task_type , void > - ( functor - , task_root_type::function_single_type(0) - , & task_root_type::template apply_team< task_type , void > - , Kokkos::Impl::FunctorTeamShmemSize< FunctorType >:: - value( functor , m_policy->m_team_size ) - , dependence_capacity - ) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - proc_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) - { return task_create_team( functor , dependence_capacity ); } - - template< class A1 , class A2 , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( const Future<A1,A2> & after - , const Future<A3,A4> & before - , typename std::enable_if - < std::is_same< typename Future<A1,A2>::execution_space , execution_space >::value - && - std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - m_policy->add_dependence( after.m_task , before.m_task ); -#endif - } - - //---------------------------------------- - - Future< Latch , execution_space > - KOKKOS_INLINE_FUNCTION - create_latch( const int N ) const - { - task_root_type * const task = - m_policy->allocate_task( sizeof(task_root_type) , 0 , 0 ); - task->m_dep_size = N ; // Using m_dep_size for latch counter - task->m_state = TASK_STATE_WAITING ; - return Future< Latch , execution_space >( task ); - } - - //---------------------------------------- - - template< class FunctorType , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( FunctorType * task_functor - , const Future<A3,A4> & before - , typename std::enable_if - < std::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - m_policy->add_dependence( get_task_root(task_functor) , before.m_task ); -#endif - } - - template< class ValueType > - const Future< ValueType , execution_space > & - spawn( const Future< ValueType , execution_space > & f - , const bool priority = false ) const - { - if ( f.m_task ) { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - f.m_task->m_queue = - ( f.m_task->m_team != 0 - ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) - : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); - m_policy->schedule_task( f.m_task ); -#endif - } - return f ; - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn( FunctorType * task_functor - , const bool priority = false ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type * const t = get_task_root(task_functor); - t->m_queue = - ( t->m_team != 0 ? & ( m_policy->m_team[ priority ? 0 : 1 ] ) - : & ( m_policy->m_serial[ priority ? 0 : 1 ] ) ); - m_policy->reschedule_task( t ); -#endif - } - - // When a create method fails by returning a null Future - // the task that called the create method may respawn - // with a dependence on memory becoming available. - // This is a race as more than one task may be respawned - // with this need. - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn_needing_memory( FunctorType * task_functor ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type * const t = get_task_root(task_functor); - t->m_queue = - ( t->m_team != 0 ? & ( m_policy->m_team[ 2 ] ) - : & ( m_policy->m_serial[ 2 ] ) ); - m_policy->reschedule_task( t ); -#endif - } - - //---------------------------------------- - // Functions for an executing task functor to query dependences, - // set new dependences, and respawn itself. - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< void , execution_space > - get_dependence( const FunctorType * task_functor , int i ) const - { - return Future<void,execution_space>( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->get_dependence(i) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - int get_dependence( const FunctorType * task_functor ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return get_task_root(task_functor)->get_dependence(); } -#else - { return 0 ; } -#endif - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void clear_dependence( FunctorType * task_functor ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { get_task_root(task_functor)->clear_dependence(); } -#else - {} -#endif - - //---------------------------------------- - - static member_type & member_single(); - - friend void wait( TaskPolicy< Kokkos::Threads > & ); -}; - -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* #ifndef KOKKOS_THREADS_TASKPOLICY_HPP */ - - diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp index ed56536cd91b52f3d0beddc8095eba9a4bb593c9..d5d27cc8365c48ddd83077cc96511374f4658e89 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp @@ -41,2892 +41,6 @@ //@HEADER */ -#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP -#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP - -#include <type_traits> -#include <initializer_list> - -#include <Kokkos_Core_fwd.hpp> -#include <Kokkos_Pair.hpp> -#include <Kokkos_Layout.hpp> -#include <impl/Kokkos_Error.hpp> -#include <impl/Kokkos_Traits.hpp> -#include <impl/KokkosExp_ViewCtor.hpp> -#include <impl/Kokkos_Atomic_View.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template< unsigned I , size_t ... Args > -struct variadic_size_t - { enum { value = ~size_t(0) }; }; - -template< size_t Val , size_t ... Args > -struct variadic_size_t< 0 , Val , Args ... > - { enum { value = Val }; }; - -template< unsigned I , size_t Val , size_t ... Args > -struct variadic_size_t< I , Val , Args ... > - { enum { value = variadic_size_t< I - 1 , Args ... >::value }; }; - -template< size_t ... Args > -struct rank_dynamic ; - -template<> -struct rank_dynamic<> { enum { value = 0 }; }; - -template< size_t Val , size_t ... Args > -struct rank_dynamic< Val , Args... > -{ - enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value }; -}; - -#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \ - template< size_t V , unsigned > struct ViewDimension ## R \ - { \ - enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \ - enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \ - KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \ - ViewDimension ## R () = default ; \ - ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ - ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ - }; \ - template< unsigned RD > struct ViewDimension ## R < 0 , RD > \ - { \ - enum { ArgN ## R = 0 }; \ - typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \ - ViewDimension ## R () = default ; \ - ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ - ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ - KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \ - }; - -KOKKOS_IMPL_VIEW_DIMENSION( 0 ) -KOKKOS_IMPL_VIEW_DIMENSION( 1 ) -KOKKOS_IMPL_VIEW_DIMENSION( 2 ) -KOKKOS_IMPL_VIEW_DIMENSION( 3 ) -KOKKOS_IMPL_VIEW_DIMENSION( 4 ) -KOKKOS_IMPL_VIEW_DIMENSION( 5 ) -KOKKOS_IMPL_VIEW_DIMENSION( 6 ) -KOKKOS_IMPL_VIEW_DIMENSION( 7 ) - -#undef KOKKOS_IMPL_VIEW_DIMENSION - -template< size_t ... Vals > -struct ViewDimension - : public ViewDimension0< variadic_size_t<0,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension1< variadic_size_t<1,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension2< variadic_size_t<2,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension3< variadic_size_t<3,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension4< variadic_size_t<4,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension5< variadic_size_t<5,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension6< variadic_size_t<6,Vals...>::value - , rank_dynamic< Vals... >::value > - , public ViewDimension7< variadic_size_t<7,Vals...>::value - , rank_dynamic< Vals... >::value > -{ - typedef ViewDimension0< variadic_size_t<0,Vals...>::value - , rank_dynamic< Vals... >::value > D0 ; - typedef ViewDimension1< variadic_size_t<1,Vals...>::value - , rank_dynamic< Vals... >::value > D1 ; - typedef ViewDimension2< variadic_size_t<2,Vals...>::value - , rank_dynamic< Vals... >::value > D2 ; - typedef ViewDimension3< variadic_size_t<3,Vals...>::value - , rank_dynamic< Vals... >::value > D3 ; - typedef ViewDimension4< variadic_size_t<4,Vals...>::value - , rank_dynamic< Vals... >::value > D4 ; - typedef ViewDimension5< variadic_size_t<5,Vals...>::value - , rank_dynamic< Vals... >::value > D5 ; - typedef ViewDimension6< variadic_size_t<6,Vals...>::value - , rank_dynamic< Vals... >::value > D6 ; - typedef ViewDimension7< variadic_size_t<7,Vals...>::value - , rank_dynamic< Vals... >::value > D7 ; - - using D0::ArgN0 ; - using D1::ArgN1 ; - using D2::ArgN2 ; - using D3::ArgN3 ; - using D4::ArgN4 ; - using D5::ArgN5 ; - using D6::ArgN6 ; - using D7::ArgN7 ; - - using D0::N0 ; - using D1::N1 ; - using D2::N2 ; - using D3::N3 ; - using D4::N4 ; - using D5::N5 ; - using D6::N6 ; - using D7::N7 ; - - enum { rank = sizeof...(Vals) }; - enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value }; - - ViewDimension() = default ; - ViewDimension( const ViewDimension & ) = default ; - ViewDimension & operator = ( const ViewDimension & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr - ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3 - , size_t n4 , size_t n5 , size_t n6 , size_t n7 ) - : D0( n0 ) - , D1( n1 ) - , D2( n2 ) - , D3( n3 ) - , D4( n4 ) - , D5( n5 ) - , D6( n6 ) - , D7( n7 ) - {} - - KOKKOS_INLINE_FUNCTION - constexpr size_t extent( const unsigned r ) const - { - return r == 0 ? N0 : ( - r == 1 ? N1 : ( - r == 2 ? N2 : ( - r == 3 ? N3 : ( - r == 4 ? N4 : ( - r == 5 ? N5 : ( - r == 6 ? N6 : ( - r == 7 ? N7 : 0 ))))))); - } - - template< size_t N > - struct prepend { typedef ViewDimension< N , Vals... > type ; }; - - template< size_t N > - struct append { typedef ViewDimension< Vals... , N > type ; }; -}; - -template< class A , class B > -struct ViewDimensionJoin ; - -template< size_t ... A , size_t ... B > -struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > { - typedef ViewDimension< A... , B... > type ; -}; - -//---------------------------------------------------------------------------- - -template< class DstDim , class SrcDim > -struct ViewDimensionAssignable ; - -template< size_t ... DstArgs , size_t ... SrcArgs > -struct ViewDimensionAssignable< ViewDimension< DstArgs ... > - , ViewDimension< SrcArgs ... > > -{ - typedef ViewDimension< DstArgs... > dst ; - typedef ViewDimension< SrcArgs... > src ; - - enum { value = - unsigned(dst::rank) == unsigned(src::rank) && ( - //Compile time check that potential static dimensions match - ( ( 1 > dst::rank_dynamic && 1 > src::rank_dynamic ) ? (size_t(dst::ArgN0) == size_t(src::ArgN0)) : true ) && - ( ( 2 > dst::rank_dynamic && 2 > src::rank_dynamic ) ? (size_t(dst::ArgN1) == size_t(src::ArgN1)) : true ) && - ( ( 3 > dst::rank_dynamic && 3 > src::rank_dynamic ) ? (size_t(dst::ArgN2) == size_t(src::ArgN2)) : true ) && - ( ( 4 > dst::rank_dynamic && 4 > src::rank_dynamic ) ? (size_t(dst::ArgN3) == size_t(src::ArgN3)) : true ) && - ( ( 5 > dst::rank_dynamic && 5 > src::rank_dynamic ) ? (size_t(dst::ArgN4) == size_t(src::ArgN4)) : true ) && - ( ( 6 > dst::rank_dynamic && 6 > src::rank_dynamic ) ? (size_t(dst::ArgN5) == size_t(src::ArgN5)) : true ) && - ( ( 7 > dst::rank_dynamic && 7 > src::rank_dynamic ) ? (size_t(dst::ArgN6) == size_t(src::ArgN6)) : true ) && - ( ( 8 > dst::rank_dynamic && 8 > src::rank_dynamic ) ? (size_t(dst::ArgN7) == size_t(src::ArgN7)) : true ) - )}; - -}; - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -struct ALL_t { - KOKKOS_INLINE_FUNCTION - constexpr const ALL_t & operator()() const { return *this ; } -}; - -template< class T > -struct is_integral_extent_type -{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; }; - -template< class iType > -struct is_integral_extent_type< std::pair<iType,iType> > -{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; - -template< class iType > -struct is_integral_extent_type< Kokkos::pair<iType,iType> > -{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; - -// Assuming '2 == initializer_list<iType>::size()' -template< class iType > -struct is_integral_extent_type< std::initializer_list<iType> > -{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; - -template < unsigned I , class ... Args > -struct is_integral_extent -{ - // get_type is void when sizeof...(Args) <= I - typedef typename std::remove_cv< - typename std::remove_reference< - typename Kokkos::Impl::get_type<I,Args... - >::type >::type >::type type ; - - enum { value = is_integral_extent_type<type>::value }; - - static_assert( value || - std::is_integral<type>::value || - std::is_same<type,void>::value - , "subview argument must be either integral or integral extent" ); -}; - -template< unsigned DomainRank , unsigned RangeRank > -struct SubviewExtents { -private: - - // Cannot declare zero-length arrays - enum { InternalRangeRank = RangeRank ? RangeRank : 1u }; - - size_t m_begin[ DomainRank ]; - size_t m_length[ InternalRangeRank ]; - unsigned m_index[ InternalRangeRank ]; - - template< size_t ... DimArgs > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim ) - { return true ; } - - template< class T , size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const T & val - , Args ... args ) - { - const size_t v = static_cast<size_t>(val); - - m_begin[ domain_rank ] = v ; - - return set( domain_rank + 1 , range_rank , dim , args... ) -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - && ( v < dim.extent( domain_rank ) ) -#endif - ; - } - - // ALL_t - template< size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const Kokkos::Experimental::Impl::ALL_t - , Args ... args ) - { - m_begin[ domain_rank ] = 0 ; - m_length[ range_rank ] = dim.extent( domain_rank ); - m_index[ range_rank ] = domain_rank ; - - return set( domain_rank + 1 , range_rank + 1 , dim , args... ); - } - - // std::pair range - template< class T , size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const std::pair<T,T> & val - , Args ... args ) - { - const size_t b = static_cast<size_t>( val.first ); - const size_t e = static_cast<size_t>( val.second ); - - m_begin[ domain_rank ] = b ; - m_length[ range_rank ] = e - b ; - m_index[ range_rank ] = domain_rank ; - - return set( domain_rank + 1 , range_rank + 1 , dim , args... ) -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - && ( e <= b + dim.extent( domain_rank ) ) -#endif - ; - } - - // Kokkos::pair range - template< class T , size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const Kokkos::pair<T,T> & val - , Args ... args ) - { - const size_t b = static_cast<size_t>( val.first ); - const size_t e = static_cast<size_t>( val.second ); - - m_begin[ domain_rank ] = b ; - m_length[ range_rank ] = e - b ; - m_index[ range_rank ] = domain_rank ; - - return set( domain_rank + 1 , range_rank + 1 , dim , args... ) -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - && ( e <= b + dim.extent( domain_rank ) ) -#endif - ; - } - - // { begin , end } range - template< class T , size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const std::initializer_list< T > & val - , Args ... args ) - { - const size_t b = static_cast<size_t>( val.begin()[0] ); - const size_t e = static_cast<size_t>( val.begin()[1] ); - - m_begin[ domain_rank ] = b ; - m_length[ range_rank ] = e - b ; - m_index[ range_rank ] = domain_rank ; - - return set( domain_rank + 1 , range_rank + 1 , dim , args... ) -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - && ( val.size() == 2 ) - && ( e <= b + dim.extent( domain_rank ) ) -#endif - ; - } - - //------------------------------ - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - - template< size_t ... DimArgs > - void error( char * - , int - , unsigned - , unsigned - , const ViewDimension< DimArgs ... > & ) const - {} - - template< class T , size_t ... DimArgs , class ... Args > - void error( char * buf , int buf_len - , unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const T & val - , Args ... args ) const - { - const int n = std::min( buf_len , - snprintf( buf , buf_len - , " %lu < %lu %c" - , static_cast<unsigned long>(val) - , static_cast<unsigned long>( dim.extent( domain_rank ) ) - , int( sizeof...(Args) ? ',' : ')' ) ) ); - - error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... ); - } - - // std::pair range - template< size_t ... DimArgs , class ... Args > - void error( char * buf , int buf_len - , unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const Kokkos::Experimental::Impl::ALL_t - , Args ... args ) const - { - const int n = std::min( buf_len , - snprintf( buf , buf_len - , " Kokkos::ALL %c" - , int( sizeof...(Args) ? ',' : ')' ) ) ); - - error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); - } - - // std::pair range - template< class T , size_t ... DimArgs , class ... Args > - void error( char * buf , int buf_len - , unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const std::pair<T,T> & val - , Args ... args ) const - { - // d <= e - b - const int n = std::min( buf_len , - snprintf( buf , buf_len - , " %lu <= %lu - %lu %c" - , static_cast<unsigned long>( dim.extent( domain_rank ) ) - , static_cast<unsigned long>( val.second ) - , static_cast<unsigned long>( val.begin ) - , int( sizeof...(Args) ? ',' : ')' ) ) ); - - error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); - } - - // Kokkos::pair range - template< class T , size_t ... DimArgs , class ... Args > - void error( char * buf , int buf_len - , unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const Kokkos::pair<T,T> & val - , Args ... args ) const - { - // d <= e - b - const int n = std::min( buf_len , - snprintf( buf , buf_len - , " %lu <= %lu - %lu %c" - , static_cast<unsigned long>( dim.extent( domain_rank ) ) - , static_cast<unsigned long>( val.second ) - , static_cast<unsigned long>( val.begin ) - , int( sizeof...(Args) ? ',' : ')' ) ) ); - - error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); - } - - // { begin , end } range - template< class T , size_t ... DimArgs , class ... Args > - void error( char * buf , int buf_len - , unsigned domain_rank - , unsigned range_rank - , const ViewDimension< DimArgs ... > & dim - , const std::initializer_list< T > & val - , Args ... args ) const - { - // d <= e - b - int n = 0 ; - if ( val.size() == 2 ) { - n = std::min( buf_len , - snprintf( buf , buf_len - , " %lu <= %lu - %lu %c" - , static_cast<unsigned long>( dim.extent( domain_rank ) ) - , static_cast<unsigned long>( val.begin()[0] ) - , static_cast<unsigned long>( val.begin()[1] ) - , int( sizeof...(Args) ? ',' : ')' ) ) ); - } - else { - n = std::min( buf_len , - snprintf( buf , buf_len - , " { ... }.size() == %u %c" - , unsigned(val.size()) - , int( sizeof...(Args) ? ',' : ')' ) ) ); - } - - error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); - } - - template< size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST ) - enum { LEN = 1024 }; - char buffer[ LEN ]; - - const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error ("); - error( buffer+n , LEN-n , 0 , 0 , dim , args... ); - - Kokkos::Impl::throw_runtime_exception(std::string(buffer)); -#else - Kokkos::abort("Kokkos::subview bounds error"); -#endif - } - -#else - - template< size_t ... DimArgs , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - void error( const ViewDimension< DimArgs ... > & , Args ... ) const {} - -#endif - -public: - - template< size_t ... DimArgs , class ... Args > - KOKKOS_INLINE_FUNCTION - SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args ) - { - static_assert( DomainRank == sizeof...(DimArgs) , "" ); - static_assert( DomainRank == sizeof...(Args) , "" ); - - // Verifies that all arguments, up to 8, are integral types, - // integral extents, or don't exist. - static_assert( RangeRank == - unsigned( is_integral_extent<0,Args...>::value ) + - unsigned( is_integral_extent<1,Args...>::value ) + - unsigned( is_integral_extent<2,Args...>::value ) + - unsigned( is_integral_extent<3,Args...>::value ) + - unsigned( is_integral_extent<4,Args...>::value ) + - unsigned( is_integral_extent<5,Args...>::value ) + - unsigned( is_integral_extent<6,Args...>::value ) + - unsigned( is_integral_extent<7,Args...>::value ) , "" ); - - if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; } - - if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... ); - } - - template < typename iType > - KOKKOS_FORCEINLINE_FUNCTION - constexpr size_t domain_offset( const iType i ) const - { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; } - - template < typename iType > - KOKKOS_FORCEINLINE_FUNCTION - constexpr size_t range_extent( const iType i ) const - { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; } - - template < typename iType > - KOKKOS_FORCEINLINE_FUNCTION - constexpr unsigned range_index( const iType i ) const - { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; } -}; - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -/** \brief Given a value type and dimension generate the View data type */ -template< class T , class Dim > -struct ViewDataType ; - -template< class T > -struct ViewDataType< T , ViewDimension<> > -{ - typedef T type ; -}; - -template< class T , size_t ... Args > -struct ViewDataType< T , ViewDimension< 0 , Args... > > -{ - typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ; -}; - -template< class T , size_t N , size_t ... Args > -struct ViewDataType< T , ViewDimension< N , Args... > > -{ - typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ; -}; - -/**\brief Analysis of View data type. - * - * Data type conforms to one of the following patterns : - * {const} value_type [][#][#][#] - * {const} value_type ***[#][#][#] - * Where the sum of counts of '*' and '[#]' is at most ten. - * - * Provide typedef for the ViewDimension<...> and value_type. - */ -template< class T > -struct ViewArrayAnalysis -{ - typedef T value_type ; - typedef typename std::add_const< T >::type const_value_type ; - typedef typename std::remove_const< T >::type non_const_value_type ; - typedef ViewDimension<> static_dimension ; - typedef ViewDimension<> dynamic_dimension ; - typedef ViewDimension<> dimension ; -}; - -template< class T , size_t N > -struct ViewArrayAnalysis< T[N] > -{ -private: - typedef ViewArrayAnalysis< T > nested ; -public: - typedef typename nested::value_type value_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - - typedef typename nested::static_dimension::template prepend<N>::type - static_dimension ; - - typedef typename nested::dynamic_dimension dynamic_dimension ; - - typedef typename - ViewDimensionJoin< dynamic_dimension , static_dimension >::type - dimension ; -}; - -template< class T > -struct ViewArrayAnalysis< T[] > -{ -private: - typedef ViewArrayAnalysis< T > nested ; - typedef typename nested::dimension nested_dimension ; -public: - typedef typename nested::value_type value_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - - typedef typename nested::dynamic_dimension::template prepend<0>::type - dynamic_dimension ; - - typedef typename nested::static_dimension static_dimension ; - - typedef typename - ViewDimensionJoin< dynamic_dimension , static_dimension >::type - dimension ; -}; - -template< class T > -struct ViewArrayAnalysis< T* > -{ -private: - typedef ViewArrayAnalysis< T > nested ; -public: - typedef typename nested::value_type value_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - - typedef typename nested::dynamic_dimension::template prepend<0>::type - dynamic_dimension ; - - typedef typename nested::static_dimension static_dimension ; - - typedef typename - ViewDimensionJoin< dynamic_dimension , static_dimension >::type - dimension ; -}; - - -template< class DataType , class ArrayLayout , class ValueType > -struct ViewDataAnalysis -{ -private: - - typedef ViewArrayAnalysis< DataType > array_analysis ; - - // ValueType is opportunity for partial specialization. - // Must match array analysis when this default template is used. - static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" ); - -public: - - typedef void specialize ; // No specialization - - typedef typename array_analysis::dimension dimension ; - typedef typename array_analysis::value_type value_type ; - typedef typename array_analysis::const_value_type const_value_type ; - typedef typename array_analysis::non_const_value_type non_const_value_type ; - - // Generate analogous multidimensional array specification type. - typedef typename ViewDataType< value_type , dimension >::type type ; - typedef typename ViewDataType< const_value_type , dimension >::type const_type ; - typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ; - - // Generate "flattened" multidimensional array specification type. - typedef type scalar_array_type ; - typedef const_type const_scalar_array_type ; - typedef non_const_type non_const_scalar_array_type ; -}; - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template < class Dimension , class Layout , typename Enable = void > -struct ViewOffset { - using is_mapping_plugin = std::false_type ; -}; - -//---------------------------------------------------------------------------- -// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding -template < class Dimension > -struct ViewOffset< Dimension , Kokkos::LayoutLeft - , typename std::enable_if<( 1 >= Dimension::rank - || - 0 == Dimension::rank_dynamic - )>::type > -{ - using is_mapping_plugin = std::true_type ; - using is_regular = std::true_type ; - - typedef size_t size_type ; - typedef Dimension dimension_type ; - typedef Kokkos::LayoutLeft array_layout ; - - dimension_type m_dim ; - - //---------------------------------------- - - // rank 1 - template< typename I0 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 ) const { return i0 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { return i0 + m_dim.N0 * i1 ; } - - //rank 3 - template < typename I0, typename I1, typename I2 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const - { - return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 ); - } - - //rank 4 - template < typename I0, typename I1, typename I2, typename I3 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const - { - return i0 + m_dim.N0 * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * i3 )); - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4 ) const - { - return i0 + m_dim.N0 * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * i4 ))); - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5 ) const - { - return i0 + m_dim.N0 * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * i5 )))); - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6 ) const - { - return i0 + m_dim.N0 * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * ( - i5 + m_dim.N5 * i6 ))))); - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6, typename I7 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const - { - return i0 + m_dim.N0 * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * ( - i5 + m_dim.N5 * ( - i6 + m_dim.N6 * i7 )))))); - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr array_layout layout() const - { - return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 - , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } - - /* Cardinality of the domain index space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type size() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - /* Span of the range space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type span() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } - - /* Strides of dimensions */ - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - s[0] = 1 ; - if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; } - if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } - if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } - if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } - if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } - if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } - if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } - if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } - } - - //---------------------------------------- - - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; - - template< unsigned TrivialScalarSize > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( std::integral_constant<unsigned,TrivialScalarSize> const & - , Kokkos::LayoutLeft const & arg_layout - ) - : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) - {} - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) - : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 - , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) - { - static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); - // Also requires equal static dimensions ... - } - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) - : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 - , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" ); - } - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) - : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 - , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" ); - if ( rhs.m_stride.S0 != 1 ) { - Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" ); - } - } - - //---------------------------------------- - // Subview construction - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( - const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , - const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) - : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( ( 0 == dimension_type::rank ) || - ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) - , "ViewOffset subview construction requires compatible rank" ); - } -}; - -//---------------------------------------------------------------------------- -// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding -template < class Dimension > -struct ViewOffset< Dimension , Kokkos::LayoutLeft - , typename std::enable_if<( 1 < Dimension::rank - && - 0 < Dimension::rank_dynamic - )>::type > -{ - using is_mapping_plugin = std::true_type ; - using is_regular = std::true_type ; - - typedef size_t size_type ; - typedef Dimension dimension_type ; - typedef Kokkos::LayoutLeft array_layout ; - - dimension_type m_dim ; - size_type m_stride ; - - //---------------------------------------- - - // rank 1 - template< typename I0 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 ) const { return i0 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { return i0 + m_stride * i1 ; } - - //rank 3 - template < typename I0, typename I1, typename I2 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const - { - return i0 + m_stride * ( i1 + m_dim.N1 * i2 ); - } - - //rank 4 - template < typename I0, typename I1, typename I2, typename I3 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const - { - return i0 + m_stride * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * i3 )); - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4 ) const - { - return i0 + m_stride * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * i4 ))); - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5 ) const - { - return i0 + m_stride * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * i5 )))); - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6 ) const - { - return i0 + m_stride * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * ( - i5 + m_dim.N5 * i6 ))))); - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6, typename I7 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const - { - return i0 + m_stride * ( - i1 + m_dim.N1 * ( - i2 + m_dim.N2 * ( - i3 + m_dim.N3 * ( - i4 + m_dim.N4 * ( - i5 + m_dim.N5 * ( - i6 + m_dim.N6 * i7 )))))); - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr array_layout layout() const - { - return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 - , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } - - /* Cardinality of the domain index space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type size() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - /* Span of the range space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type span() const - { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; } - - /* Strides of dimensions */ - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - s[0] = 1 ; - if ( 0 < dimension_type::rank ) { s[1] = m_stride ; } - if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } - if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } - if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } - if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } - if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } - if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } - if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } - } - - //---------------------------------------- - -private: - - template< unsigned TrivialScalarSize > - struct Padding { - enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; - enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; - - // If memory alignment is a multiple of the trivial scalar size then attempt to align. - enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; - enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr - - KOKKOS_INLINE_FUNCTION - static constexpr size_t stride( size_t const N ) - { - return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) - ? N + align - ( N % div_ok ) : N ; - } - }; - -public: - - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; - - /* Enable padding for trivial scalar types with non-zero trivial scalar size */ - template< unsigned TrivialScalarSize > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size - , Kokkos::LayoutLeft const & arg_layout - ) - : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] - , arg_layout.dimension[2] , arg_layout.dimension[3] - , arg_layout.dimension[4] , arg_layout.dimension[5] - , arg_layout.dimension[6] , arg_layout.dimension[7] - ) - , m_stride( Padding<TrivialScalarSize>::stride( arg_layout.dimension[0] ) ) - {} - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) - : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 - , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) - , m_stride( rhs.stride_1() ) - { - static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); - // Also requires equal static dimensions ... - } - - //---------------------------------------- - // Subview construction - // This subview must be 2 == rank and 2 == rank_dynamic - // due to only having stride #0. - // The source dimension #0 must be non-zero for stride-one leading dimension. - // At most subsequent dimension can be non-zero. - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , - const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) - : m_dim( sub.range_extent(0) - , sub.range_extent(1) - , 0, 0, 0, 0, 0, 0 ) - , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() : - ( 2 == sub.range_index(1) ? rhs.stride_2() : - ( 3 == sub.range_index(1) ? rhs.stride_3() : - ( 4 == sub.range_index(1) ? rhs.stride_4() : - ( 5 == sub.range_index(1) ? rhs.stride_5() : - ( 6 == sub.range_index(1) ? rhs.stride_6() : - ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 )))))))) - { - static_assert( ( 2 == dimension_type::rank ) && - ( 2 == dimension_type::rank_dynamic ) && - ( 2 <= DimRHS::rank ) - , "ViewOffset subview construction requires compatible rank" ); - } -}; - -//---------------------------------------------------------------------------- -// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding -template < class Dimension > -struct ViewOffset< Dimension , Kokkos::LayoutRight - , typename std::enable_if<( 1 >= Dimension::rank - || - 0 == Dimension::rank_dynamic - )>::type > -{ - using is_mapping_plugin = std::true_type ; - using is_regular = std::true_type ; - - typedef size_t size_type ; - typedef Dimension dimension_type ; - typedef Kokkos::LayoutRight array_layout ; - - dimension_type m_dim ; - - //---------------------------------------- - - // rank 1 - template< typename I0 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 ) const { return i0 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { return i1 + m_dim.N1 * i0 ; } - - //rank 3 - template < typename I0, typename I1, typename I2 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const - { - return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 )); - } - - //rank 4 - template < typename I0, typename I1, typename I2, typename I3 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const - { - return i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( - i1 + m_dim.N1 * ( i0 ))); - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4 ) const - { - return i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( - i1 + m_dim.N1 * ( i0 )))); - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5 ) const - { - return i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( - i1 + m_dim.N1 * ( i0 ))))); - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6 ) const - { - return i6 + m_dim.N6 * ( - i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( - i1 + m_dim.N1 * ( i0 )))))); - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6, typename I7 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const - { - return i7 + m_dim.N7 * ( - i6 + m_dim.N6 * ( - i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( - i1 + m_dim.N1 * ( i0 ))))))); - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr array_layout layout() const - { - return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 - , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } - - /* Cardinality of the domain index space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type size() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - /* Span of the range space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type span() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } - - /* Strides of dimensions */ - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - size_type n = 1 ; - if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } - if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } - if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } - if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } - if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } - if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } - if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; } - if ( 0 < dimension_type::rank ) { s[0] = n ; } - s[dimension_type::rank] = n * m_dim.N0 ; - } - - //---------------------------------------- - - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; - - template< unsigned TrivialScalarSize > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( std::integral_constant<unsigned,TrivialScalarSize> const & - , Kokkos::LayoutRight const & arg_layout - ) - : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) - {} - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) - : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 - , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) - { - static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); - // Also requires equal static dimensions ... - } - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) - : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 - , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" ); - } - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) - : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 - , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" ); - if ( rhs.m_stride.S0 != 1 ) { - Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft/Right from LayoutStride requires stride == 1" ); - } - } - - //---------------------------------------- - // Subview construction - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs - , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub - ) - : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 ) - { - static_assert( ( 0 == dimension_type::rank_dynamic ) || - ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) - , "ViewOffset subview construction requires compatible rank" ); - } -}; - -//---------------------------------------------------------------------------- -// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding -template < class Dimension > -struct ViewOffset< Dimension , Kokkos::LayoutRight - , typename std::enable_if<( 1 < Dimension::rank - && - 0 < Dimension::rank_dynamic - )>::type > -{ - using is_mapping_plugin = std::true_type ; - using is_regular = std::true_type ; - - typedef size_t size_type ; - typedef Dimension dimension_type ; - typedef Kokkos::LayoutRight array_layout ; - - dimension_type m_dim ; - size_type m_stride ; - - //---------------------------------------- - - // rank 1 - template< typename I0 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 ) const { return i0 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { return i1 + i0 * m_stride ; } - - //rank 3 - template < typename I0, typename I1, typename I2 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const - { return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; } - - //rank 4 - template < typename I0, typename I1, typename I2, typename I3 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const - { - return i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( i1 )) + - i0 * m_stride ; - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4 ) const - { - return i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( i1 ))) + - i0 * m_stride ; - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5 ) const - { - return i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( i1 )))) + - i0 * m_stride ; - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6 ) const - { - return i6 + m_dim.N6 * ( - i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( i1 ))))) + - i0 * m_stride ; - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6, typename I7 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const - { - return i7 + m_dim.N7 * ( - i6 + m_dim.N6 * ( - i5 + m_dim.N5 * ( - i4 + m_dim.N4 * ( - i3 + m_dim.N3 * ( - i2 + m_dim.N2 * ( i1 )))))) + - i0 * m_stride ; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr array_layout layout() const - { - return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 - , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } - - /* Cardinality of the domain index space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type size() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - - /* Span of the range space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type span() const - { return m_dim.N0 * m_stride ; } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const - { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } - - /* Strides of dimensions */ - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - size_type n = 1 ; - if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } - if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } - if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } - if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } - if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } - if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } - if ( 1 < dimension_type::rank ) { s[1] = n ; } - if ( 0 < dimension_type::rank ) { s[0] = m_stride ; } - s[dimension_type::rank] = m_stride * m_dim.N0 ; - } - - //---------------------------------------- - -private: - - template< unsigned TrivialScalarSize > - struct Padding { - enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; - enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; - - // If memory alignment is a multiple of the trivial scalar size then attempt to align. - enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; - enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr - - KOKKOS_INLINE_FUNCTION - static constexpr size_t stride( size_t const N ) - { - return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) - ? N + align - ( N % div_ok ) : N ; - } - }; - -public: - - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; - - /* Enable padding for trivial scalar types with non-zero trivial scalar size. */ - template< unsigned TrivialScalarSize > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size - , Kokkos::LayoutRight const & arg_layout - ) - : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] - , arg_layout.dimension[2] , arg_layout.dimension[3] - , arg_layout.dimension[4] , arg_layout.dimension[5] - , arg_layout.dimension[6] , arg_layout.dimension[7] - ) - , m_stride( Padding<TrivialScalarSize>:: - stride( /* 2 <= rank */ - m_dim.N1 * ( dimension_type::rank == 2 ? 1 : - m_dim.N2 * ( dimension_type::rank == 3 ? 1 : - m_dim.N3 * ( dimension_type::rank == 4 ? 1 : - m_dim.N4 * ( dimension_type::rank == 5 ? 1 : - m_dim.N5 * ( dimension_type::rank == 6 ? 1 : - m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) )) - {} - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) - : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 - , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) - , m_stride( rhs.stride_0() ) - { - static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); - // Also requires equal static dimensions ... - } - - //---------------------------------------- - // Subview construction - // Last dimension must be non-zero - - template< class DimRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs - , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub - ) - : m_dim( sub.range_extent(0) - , sub.range_extent(1) - , 0, 0, 0, 0, 0, 0 ) - , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : ( - 1 == sub.range_index(0) ? rhs.stride_1() : ( - 2 == sub.range_index(0) ? rhs.stride_2() : ( - 3 == sub.range_index(0) ? rhs.stride_3() : ( - 4 == sub.range_index(0) ? rhs.stride_4() : ( - 5 == sub.range_index(0) ? rhs.stride_5() : ( - 6 == sub.range_index(0) ? rhs.stride_6() : 0 ))))))) - { - // This subview must be 2 == rank and 2 == rank_dynamic - // due to only having stride #0. - // The source dimension #0 must be non-zero for stride-one leading dimension. - // At most subsequent dimension can be non-zero. - - static_assert( ( 2 == dimension_type::rank ) && - ( 2 <= DimRHS::rank ) - , "ViewOffset subview construction requires compatible rank" ); - } -}; - -//---------------------------------------------------------------------------- -/* Strided array layout only makes sense for 0 < rank */ -/* rank = 0 included for DynRankView case */ - -template< unsigned Rank > -struct ViewStride ; - -template<> -struct ViewStride<0> { - enum { S0 = 0 , S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t , size_t , size_t , size_t - , size_t , size_t , size_t , size_t ) - {} -}; - -template<> -struct ViewStride<1> { - size_t S0 ; - enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t , size_t , size_t - , size_t , size_t , size_t , size_t ) - : S0( aS0 ) - {} -}; - -template<> -struct ViewStride<2> { - size_t S0 , S1 ; - enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t - , size_t , size_t , size_t , size_t ) - : S0( aS0 ) , S1( aS1 ) - {} -}; - -template<> -struct ViewStride<3> { - size_t S0 , S1 , S2 ; - enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t - , size_t , size_t , size_t , size_t ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) - {} -}; - -template<> -struct ViewStride<4> { - size_t S0 , S1 , S2 , S3 ; - enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 - , size_t , size_t , size_t , size_t ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) - {} -}; - -template<> -struct ViewStride<5> { - size_t S0 , S1 , S2 , S3 , S4 ; - enum { S5 = 0 , S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 - , size_t aS4 , size_t , size_t , size_t ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) - , S4( aS4 ) - {} -}; - -template<> -struct ViewStride<6> { - size_t S0 , S1 , S2 , S3 , S4 , S5 ; - enum { S6 = 0 , S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 - , size_t aS4 , size_t aS5 , size_t , size_t ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) - , S4( aS4 ) , S5( aS5 ) - {} -}; - -template<> -struct ViewStride<7> { - size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ; - enum { S7 = 0 }; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 - , size_t aS4 , size_t aS5 , size_t aS6 , size_t ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) - , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) - {} -}; - -template<> -struct ViewStride<8> { - size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ; - - ViewStride() = default ; - ViewStride( const ViewStride & ) = default ; - ViewStride & operator = ( const ViewStride & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 - , size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 ) - : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) - , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 ) - {} -}; - -template < class Dimension > -struct ViewOffset< Dimension , Kokkos::LayoutStride - , void > -{ -private: - typedef ViewStride< Dimension::rank > stride_type ; -public: - - using is_mapping_plugin = std::true_type ; - using is_regular = std::true_type ; - - typedef size_t size_type ; - typedef Dimension dimension_type ; - typedef Kokkos::LayoutStride array_layout ; - - dimension_type m_dim ; - stride_type m_stride ; - - //---------------------------------------- - - // rank 1 - template< typename I0 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 ) const - { - return i0 * m_stride.S0 ; - } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 ; - } - - //rank 3 - template < typename I0, typename I1, typename I2 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 ; - } - - //rank 4 - template < typename I0, typename I1, typename I2, typename I3 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 + - i3 * m_stride.S3 ; - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 + - i3 * m_stride.S3 + - i4 * m_stride.S4 ; - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 + - i3 * m_stride.S3 + - i4 * m_stride.S4 + - i5 * m_stride.S5 ; - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 + - i3 * m_stride.S3 + - i4 * m_stride.S4 + - i5 * m_stride.S5 + - i6 * m_stride.S6 ; - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - , typename I4, typename I5, typename I6, typename I7 > - KOKKOS_INLINE_FUNCTION constexpr - size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 - , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const - { - return i0 * m_stride.S0 + - i1 * m_stride.S1 + - i2 * m_stride.S2 + - i3 * m_stride.S3 + - i4 * m_stride.S4 + - i5 * m_stride.S5 + - i6 * m_stride.S6 + - i7 * m_stride.S7 ; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr array_layout layout() const - { - return array_layout( m_dim.N0 , m_stride.S0 - , m_dim.N1 , m_stride.S1 - , m_dim.N2 , m_stride.S2 - , m_dim.N3 , m_stride.S3 - , m_dim.N4 , m_stride.S4 - , m_dim.N5 , m_stride.S5 - , m_dim.N6 , m_stride.S6 - , m_dim.N7 , m_stride.S7 - ); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } - - /* Cardinality of the domain index space */ - KOKKOS_INLINE_FUNCTION - constexpr size_type size() const - { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } - -private: - - KOKKOS_INLINE_FUNCTION - static constexpr size_type Max( size_type lhs , size_type rhs ) - { return lhs < rhs ? rhs : lhs ; } - -public: - - /* Span of the range space, largest stride * dimension */ - KOKKOS_INLINE_FUNCTION - constexpr size_type span() const - { - return Max( m_dim.N0 * m_stride.S0 , - Max( m_dim.N1 * m_stride.S1 , - Max( m_dim.N2 * m_stride.S2 , - Max( m_dim.N3 * m_stride.S3 , - Max( m_dim.N4 * m_stride.S4 , - Max( m_dim.N5 * m_stride.S5 , - Max( m_dim.N6 * m_stride.S6 , - m_dim.N7 * m_stride.S7 ))))))); - } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); } - - /* Strides of dimensions */ - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; } - if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; } - if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; } - if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; } - if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; } - if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; } - if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; } - if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; } - s[dimension_type::rank] = span(); - } - - //---------------------------------------- - - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; - - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( std::integral_constant<unsigned,0> const & - , Kokkos::LayoutStride const & rhs ) - : m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3] - , rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] ) - , m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3] - , rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] ) - {} - - template< class DimRHS , class LayoutRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) - : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 - , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) - , m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3() - , rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() ) - { - static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); - // Also requires equal static dimensions ... - } - - //---------------------------------------- - // Subview construction - -private: - - template< class DimRHS , class LayoutRHS > - KOKKOS_INLINE_FUNCTION static - constexpr size_t stride - ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) - { - return r > 7 ? 0 : ( - r == 0 ? rhs.stride_0() : ( - r == 1 ? rhs.stride_1() : ( - r == 2 ? rhs.stride_2() : ( - r == 3 ? rhs.stride_3() : ( - r == 4 ? rhs.stride_4() : ( - r == 5 ? rhs.stride_5() : ( - r == 6 ? rhs.stride_6() : rhs.stride_7() ))))))); - } - -public: - - template< class DimRHS , class LayoutRHS > - KOKKOS_INLINE_FUNCTION - constexpr ViewOffset - ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs - , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub - ) - // range_extent(r) returns 0 when dimension_type::rank <= r - : m_dim( sub.range_extent(0) - , sub.range_extent(1) - , sub.range_extent(2) - , sub.range_extent(3) - , sub.range_extent(4) - , sub.range_extent(5) - , sub.range_extent(6) - , sub.range_extent(7) - ) - // range_index(r) returns ~0u when dimension_type::rank <= r - , m_stride( stride( sub.range_index(0), rhs ) - , stride( sub.range_index(1), rhs ) - , stride( sub.range_index(2), rhs ) - , stride( sub.range_index(3), rhs ) - , stride( sub.range_index(4), rhs ) - , stride( sub.range_index(5), rhs ) - , stride( sub.range_index(6), rhs ) - , stride( sub.range_index(7), rhs ) - ) - {} -}; - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -/** \brief ViewDataHandle provides the type of the 'data handle' which the view - * uses to access data with the [] operator. It also provides - * an allocate function and a function to extract a raw ptr from the - * data handle. ViewDataHandle also defines an enum ReferenceAble which - * specifies whether references/pointers to elements can be taken and a - * 'return_type' which is what the view operators will give back. - * Specialisation of this object allows three things depending - * on ViewTraits and compiler options: - * (i) Use special allocator (e.g. huge pages/small pages and pinned memory) - * (ii) Use special data handle type (e.g. add Cuda Texture Object) - * (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads) - */ -template< class Traits , class Enable = void > -struct ViewDataHandle { - - typedef typename Traits::value_type value_type ; - typedef typename Traits::value_type * handle_type ; - typedef typename Traits::value_type & return_type ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; - - KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr - , track_type const & /*arg_tracker*/ ) - { - return handle_type( arg_data_ptr ); - } -}; - -template< class Traits > -struct ViewDataHandle< Traits , - typename std::enable_if<( std::is_same< typename Traits::non_const_value_type - , typename Traits::value_type >::value - && - std::is_same< typename Traits::specialize , void >::value - && - Traits::memory_traits::Atomic - )>::type > -{ - typedef typename Traits::value_type value_type ; - typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits > handle_type ; - typedef typename Kokkos::Impl::AtomicDataElement< Traits > return_type ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; - - KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr - , track_type const & /*arg_tracker*/ ) - { - return handle_type( arg_data_ptr ); - } -}; - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -//---------------------------------------------------------------------------- - -/* - * The construction, assignment to default, and destruction - * are merged into a single functor. - * Primarily to work around an unresolved CUDA back-end bug - * that would lose the destruction cuda device function when - * called from the shared memory tracking destruction. - * Secondarily to have two fewer partial specializations. - */ -template< class ExecSpace - , class ValueType - , bool IsScalar = std::is_scalar< ValueType >::value - > -struct ViewValueFunctor ; - -template< class ExecSpace , class ValueType > -struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > -{ - typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; - - ExecSpace space ; - ValueType * ptr ; - size_t n ; - bool destroy ; - - KOKKOS_INLINE_FUNCTION - void operator()( const size_t i ) const - { - if ( destroy ) { (ptr+i)->~ValueType(); } - else { new (ptr+i) ValueType(); } - } - - ViewValueFunctor() = default ; - ViewValueFunctor( const ViewValueFunctor & ) = default ; - ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; - - ViewValueFunctor( ExecSpace const & arg_space - , ValueType * const arg_ptr - , size_t const arg_n ) - : space( arg_space ) - , ptr( arg_ptr ) - , n( arg_n ) - , destroy( false ) - {} - - void execute( bool arg ) - { - destroy = arg ; - if ( ! space.in_parallel() ) { - const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > - closure( *this , PolicyType( 0 , n ) ); - closure.execute(); - space.fence(); - } - else { - for ( size_t i = 0 ; i < n ; ++i ) operator()(i); - } - } - - void construct_shared_allocation() - { execute( false ); } - - void destroy_shared_allocation() - { execute( true ); } -}; - - -template< class ExecSpace , class ValueType > -struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ > -{ - typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; - - ExecSpace space ; - ValueType * ptr ; - size_t n ; - - KOKKOS_INLINE_FUNCTION - void operator()( const size_t i ) const - { ptr[i] = ValueType(); } - - ViewValueFunctor() = default ; - ViewValueFunctor( const ViewValueFunctor & ) = default ; - ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; - - ViewValueFunctor( ExecSpace const & arg_space - , ValueType * const arg_ptr - , size_t const arg_n ) - : space( arg_space ) - , ptr( arg_ptr ) - , n( arg_n ) - {} - - void construct_shared_allocation() - { - if ( ! space.in_parallel() ) { - const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > - closure( *this , PolicyType( 0 , n ) ); - closure.execute(); - space.fence(); - } - else { - for ( size_t i = 0 ; i < n ; ++i ) operator()(i); - } - } - - void destroy_shared_allocation() {} -}; - -//---------------------------------------------------------------------------- -/** \brief View mapping for non-specialized data type and standard layout */ -template< class Traits > -class ViewMapping< Traits , - typename std::enable_if<( - std::is_same< typename Traits::specialize , void >::value - && - ViewOffset< typename Traits::dimension - , typename Traits::array_layout - , void >::is_mapping_plugin::value - )>::type > -{ -private: - - template< class , class ... > friend class ViewMapping ; - template< class , class ... > friend class Kokkos::Experimental::View ; - - typedef ViewOffset< typename Traits::dimension - , typename Traits::array_layout - , void - > offset_type ; - - typedef typename ViewDataHandle< Traits >::handle_type handle_type ; - - handle_type m_handle ; - offset_type m_offset ; - - KOKKOS_INLINE_FUNCTION - ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) - : m_handle( arg_handle ) - , m_offset( arg_offset ) - {} - -public: - - //---------------------------------------- - // Domain dimensions - - enum { Rank = Traits::dimension::rank }; - - template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const - { return m_offset.m_dim.extent(r); } - - KOKKOS_INLINE_FUNCTION constexpr - typename Traits::array_layout layout() const - { return m_offset.layout(); } - - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } - - // Is a regular layout with uniform striding for each index. - using is_regular = typename offset_type::is_regular ; - - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } - - template< typename iType > - KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); } - - //---------------------------------------- - // Range span - - /** \brief Span of the mapped range */ - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); } - - /** \brief Is the mapped range span contiguous */ - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); } - - typedef typename ViewDataHandle< Traits >::return_type reference_type ; - typedef typename Traits::value_type * pointer_type ; - - /** \brief If data references are lvalue_reference than can query pointer to memory */ - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const - { - return std::is_lvalue_reference< reference_type >::value - ? (pointer_type) m_handle - : (pointer_type) 0 ; - } - - //---------------------------------------- - // The View class performs all rank and bounds checking before - // calling these element reference methods. - - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference() const { return m_handle[0]; } - - template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename - std::enable_if< std::is_integral<I0>::value && - ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value - , reference_type >::type - reference( const I0 & i0 ) const { return m_handle[i0]; } - - template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename - std::enable_if< std::is_integral<I0>::value && - std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value - , reference_type >::type - reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; } - - template< typename I0 , typename I1 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 ) const - { return m_handle[ m_offset(i0,i1) ]; } - - template< typename I0 , typename I1 , typename I2 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const - { return m_handle[ m_offset(i0,i1,i2) ]; } - - template< typename I0 , typename I1 , typename I2 , typename I3 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const - { return m_handle[ m_offset(i0,i1,i2,i3) ]; } - - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; } - - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; } - - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } - - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 , typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } - - //---------------------------------------- - -private: - - enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; - enum { MemorySpanSize = sizeof(typename Traits::value_type) }; - -public: - - /** \brief Span, in bytes, of the referenced memory */ - KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const - { - return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask); - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION ~ViewMapping() {} - KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {} - KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} - KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } - - KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} - KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } - - //---------------------------------------- - - /**\brief Span, in bytes, of the required memory */ - KOKKOS_INLINE_FUNCTION - static constexpr size_t memory_span( typename Traits::array_layout const & arg_layout ) - { - typedef std::integral_constant< unsigned , 0 > padding ; - return ( offset_type( padding(), arg_layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); - } - - /**\brief Wrap a span of memory */ - template< class ... P > - KOKKOS_INLINE_FUNCTION - ViewMapping( ViewCtorProp< P ... > const & arg_prop - , typename Traits::array_layout const & arg_layout - ) - : m_handle( ( (ViewCtorProp<void,pointer_type> const &) arg_prop ).value ) - , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout ) - {} - - //---------------------------------------- - /* Allocate and construct mapped array. - * Allocate via shared allocation record and - * return that record for allocation tracking. - */ - template< class ... P > - SharedAllocationRecord<> * - allocate_shared( ViewCtorProp< P... > const & arg_prop - , typename Traits::array_layout const & arg_layout ) - { - typedef ViewCtorProp< P... > alloc_prop ; - - typedef typename alloc_prop::execution_space execution_space ; - typedef typename Traits::memory_space memory_space ; - typedef typename Traits::value_type value_type ; - typedef ViewValueFunctor< execution_space , value_type > functor_type ; - typedef SharedAllocationRecord< memory_space , functor_type > record_type ; - - // Query the mapping for byte-size of allocation. - // If padding is allowed then pass in sizeof value type - // for padding computation. - typedef std::integral_constant - < unsigned - , alloc_prop::allow_padding ? sizeof(value_type) : 0 - > padding ; - - m_offset = offset_type( padding(), arg_layout ); - - const size_t alloc_size = - ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); - - // Create shared memory tracking record with allocate memory from the memory space - record_type * const record = - record_type::allocate( ( (ViewCtorProp<void,memory_space> const &) arg_prop ).value - , ( (ViewCtorProp<void,std::string> const &) arg_prop ).value - , alloc_size ); - - // Only set the the pointer and initialize if the allocation is non-zero. - // May be zero if one of the dimensions is zero. - if ( alloc_size ) { - - m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); - - if ( alloc_prop::initialize ) { - // Assume destruction is only required when construction is requested. - // The ViewValueFunctor has both value construction and destruction operators. - record->m_destroy = functor_type( ( (ViewCtorProp<void,execution_space> const &) arg_prop).value - , (value_type *) m_handle - , m_offset.span() - ); - - // Construct values - record->m_destroy.construct_shared_allocation(); - } - } - - return record ; - } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -/** \brief Assign compatible default mappings */ - -template< class DstTraits , class SrcTraits > -class ViewMapping< DstTraits , SrcTraits , - typename std::enable_if<( - std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value - && - std::is_same< typename DstTraits::specialize , void >::value - && - std::is_same< typename SrcTraits::specialize , void >::value - && - ( - std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value - || - ( - ( - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value - ) - && - ( - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value - ) - ) - ) - )>::type > -{ -private: - - enum { is_assignable_value_type = - std::is_same< typename DstTraits::value_type - , typename SrcTraits::value_type >::value || - std::is_same< typename DstTraits::value_type - , typename SrcTraits::const_value_type >::value }; - - enum { is_assignable_dimension = - ViewDimensionAssignable< typename DstTraits::dimension - , typename SrcTraits::dimension >::value }; - - enum { is_assignable_layout = - std::is_same< typename DstTraits::array_layout - , typename SrcTraits::array_layout >::value || - std::is_same< typename DstTraits::array_layout - , Kokkos::LayoutStride >::value || - ( DstTraits::dimension::rank == 0 ) || - ( DstTraits::dimension::rank == 1 && - DstTraits::dimension::rank_dynamic == 1 ) - }; - -public: - - enum { is_assignable = is_assignable_value_type && - is_assignable_dimension && - is_assignable_layout }; - - typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; - typedef ViewMapping< DstTraits , void > DstType ; - typedef ViewMapping< SrcTraits , void > SrcType ; - - KOKKOS_INLINE_FUNCTION - static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) - { - static_assert( is_assignable_value_type - , "View assignment must have same value type or const = non-const" ); - - static_assert( is_assignable_dimension - , "View assignment must have compatible dimensions" ); - - static_assert( is_assignable_layout - , "View assignment must have compatible layout or have rank <= 1" ); - - typedef typename DstType::offset_type dst_offset_type ; - - if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) { - typedef typename DstTraits::dimension dst_dim; - bool assignable = - ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN0 == src.dimension_0() : true ) && - ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN1 == src.dimension_1() : true ) && - ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN2 == src.dimension_2() : true ) && - ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN3 == src.dimension_3() : true ) && - ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN4 == src.dimension_4() : true ) && - ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN5 == src.dimension_5() : true ) && - ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN6 == src.dimension_6() : true ) && - ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ? - dst_dim::ArgN7 == src.dimension_7() : true ) - ; - if(!assignable) - Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); - } - dst.m_offset = dst_offset_type( src.m_offset ); - dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); - } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// Subview mapping. -// Deduce destination view type from source view traits and subview arguments - -template< class SrcTraits , class ... Args > -struct ViewMapping - < typename std::enable_if<( - std::is_same< typename SrcTraits::specialize , void >::value - && - ( - std::is_same< typename SrcTraits::array_layout - , Kokkos::LayoutLeft >::value || - std::is_same< typename SrcTraits::array_layout - , Kokkos::LayoutRight >::value || - std::is_same< typename SrcTraits::array_layout - , Kokkos::LayoutStride >::value - ) - )>::type - , SrcTraits - , Args ... > -{ -private: - - static_assert( SrcTraits::rank == sizeof...(Args) , - "Subview mapping requires one argument for each dimension of source View" ); - - enum - { RZ = false - , R0 = bool(is_integral_extent<0,Args...>::value) - , R1 = bool(is_integral_extent<1,Args...>::value) - , R2 = bool(is_integral_extent<2,Args...>::value) - , R3 = bool(is_integral_extent<3,Args...>::value) - , R4 = bool(is_integral_extent<4,Args...>::value) - , R5 = bool(is_integral_extent<5,Args...>::value) - , R6 = bool(is_integral_extent<6,Args...>::value) - , R7 = bool(is_integral_extent<7,Args...>::value) - }; - - enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) - + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; - - // Whether right-most rank is a range. - enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : ( - 1 == SrcTraits::rank ? R0 : ( - 2 == SrcTraits::rank ? R1 : ( - 3 == SrcTraits::rank ? R2 : ( - 4 == SrcTraits::rank ? R3 : ( - 5 == SrcTraits::rank ? R4 : ( - 6 == SrcTraits::rank ? R5 : ( - 7 == SrcTraits::rank ? R6 : R7 )))))))) }; - - // Subview's layout - typedef typename std::conditional< - ( /* Same array layout IF */ - ( rank == 0 ) /* output rank zero */ - || - // OutputRank 1 or 2, InputLayout Left, Interval 0 - // because single stride one or second index has a stride. - ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) //replace with input rank - || - // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] - // because single stride one or second index has a stride. - ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) //replace input rank - ), typename SrcTraits::array_layout , Kokkos::LayoutStride - >::type array_layout ; - - typedef typename SrcTraits::value_type value_type ; - - typedef typename std::conditional< rank == 0 , value_type , - typename std::conditional< rank == 1 , value_type * , - typename std::conditional< rank == 2 , value_type ** , - typename std::conditional< rank == 3 , value_type *** , - typename std::conditional< rank == 4 , value_type **** , - typename std::conditional< rank == 5 , value_type ***** , - typename std::conditional< rank == 6 , value_type ****** , - typename std::conditional< rank == 7 , value_type ******* , - value_type ******** - >::type >::type >::type >::type >::type >::type >::type >::type - data_type ; - -public: - - typedef Kokkos::Experimental::ViewTraits - < data_type - , array_layout - , typename SrcTraits::device_type - , typename SrcTraits::memory_traits > traits_type ; - - typedef Kokkos::Experimental::View - < data_type - , array_layout - , typename SrcTraits::device_type - , typename SrcTraits::memory_traits > type ; - - template< class MemoryTraits > - struct apply { - - static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); - - typedef Kokkos::Experimental::ViewTraits - < data_type - , array_layout - , typename SrcTraits::device_type - , MemoryTraits > traits_type ; - - typedef Kokkos::Experimental::View - < data_type - , array_layout - , typename SrcTraits::device_type - , MemoryTraits > type ; - }; - - // The presumed type is 'ViewMapping< traits_type , void >' - // However, a compatible ViewMapping is acceptable. - template< class DstTraits > - KOKKOS_INLINE_FUNCTION - static void assign( ViewMapping< DstTraits , void > & dst - , ViewMapping< SrcTraits , void > const & src - , Args ... args ) - { - static_assert( - ViewMapping< DstTraits , traits_type , void >::is_assignable , - "Subview destination type must be compatible with subview derived type" ); - - typedef ViewMapping< DstTraits , void > DstType ; - - typedef typename DstType::offset_type dst_offset_type ; - typedef typename DstType::handle_type dst_handle_type ; - - const SubviewExtents< SrcTraits::rank , rank > - extents( src.m_offset.m_dim , args... ); - - dst.m_offset = dst_offset_type( src.m_offset , extents ); - dst.m_handle = dst_handle_type( src.m_handle + - src.m_offset( extents.domain_offset(0) - , extents.domain_offset(1) - , extents.domain_offset(2) - , extents.domain_offset(3) - , extents.domain_offset(4) - , extents.domain_offset(5) - , extents.domain_offset(6) - , extents.domain_offset(7) - ) ); - } -}; - - - -//---------------------------------------------------------------------------- - -}}} // namespace Kokkos::Experimental::Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template< unsigned , class MapType > -KOKKOS_INLINE_FUNCTION -bool view_verify_operator_bounds( const MapType & ) -{ return true ; } - -template< unsigned R , class MapType , class iType , class ... Args > -KOKKOS_INLINE_FUNCTION -bool view_verify_operator_bounds - ( const MapType & map - , const iType & i - , Args ... args - ) -{ - return ( size_t(i) < map.extent(R) ) - && view_verify_operator_bounds<R+1>( map , args ... ); -} - -template< unsigned , class MapType > -inline -void view_error_operator_bounds( char * , int , const MapType & ) -{} - -template< unsigned R , class MapType , class iType , class ... Args > -inline -void view_error_operator_bounds - ( char * buf - , int len - , const MapType & map - , const iType & i - , Args ... args - ) -{ - const int n = - snprintf(buf,len," %ld < %ld %c" - , static_cast<unsigned long>(i) - , static_cast<unsigned long>( map.extent(R) ) - , ( sizeof...(Args) ? ',' : ')' ) - ); - view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); -} - -template< class MapType , class ... Args > -KOKKOS_INLINE_FUNCTION -void view_verify_operator_bounds - ( const MapType & map , Args ... args ) -{ - if ( ! view_verify_operator_bounds<0>( map , args ... ) ) { -#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST ) - enum { LEN = 1024 }; - char buffer[ LEN ]; - int n = snprintf(buf,LEN,"View bounds error(" ); - view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); - Kokkos::Impl::throw_runtime_exception(std::string(buffer)); -#else - Kokkos::abort("View bounds error"); -#endif - } -} - - -class Error_view_scalar_reference_to_non_scalar_view ; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */ +// Deprecated file for backward compatibility +#include <impl/Kokkos_ViewMapping.hpp> diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp deleted file mode 100644 index 2de9df008ee5b42b5d38727ead56bae768869c43..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp +++ /dev/null @@ -1,260 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_ANALYZESHAPE_HPP -#define KOKKOS_ANALYZESHAPE_HPP - -#include <impl/Kokkos_Shape.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -/** \brief Analyze the array shape defined by a Kokkos::View data type. - * - * It is presumed that the data type can be mapped down to a multidimensional - * array of an intrinsic scalar numerical type (double, float, int, ... ). - * The 'value_type' of an array may be an embedded aggregate type such - * as a fixed length array 'Array<T,N>'. - * In this case the 'array_intrinsic_type' represents the - * underlying array of intrinsic scalar numerical type. - * - * The embedded aggregate type must have an AnalyzeShape specialization - * to map it down to a shape and intrinsic scalar numerical type. - */ -template< class T > -struct AnalyzeShape : public Shape< sizeof(T) , 0 > -{ - typedef void specialize ; - - typedef Shape< sizeof(T), 0 > shape ; - - typedef T array_intrinsic_type ; - typedef T value_type ; - typedef T type ; - - typedef const T const_array_intrinsic_type ; - typedef const T const_value_type ; - typedef const T const_type ; - - typedef T non_const_array_intrinsic_type ; - typedef T non_const_value_type ; - typedef T non_const_type ; -}; - -template<> -struct AnalyzeShape<void> : public Shape< 0 , 0 > -{ - typedef void specialize ; - - typedef Shape< 0 , 0 > shape ; - - typedef void array_intrinsic_type ; - typedef void value_type ; - typedef void type ; - typedef const void const_array_intrinsic_type ; - typedef const void const_value_type ; - typedef const void const_type ; - typedef void non_const_array_intrinsic_type ; - typedef void non_const_value_type ; - typedef void non_const_type ; -}; - -template< class T > -struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape -{ -private: - typedef AnalyzeShape<T> nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename nested::shape shape ; - - typedef typename nested::const_array_intrinsic_type array_intrinsic_type ; - typedef typename nested::const_value_type value_type ; - typedef typename nested::const_type type ; - - typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type ; - - typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type ; -}; - -template< class T > -struct AnalyzeShape< T * > - : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type -{ -private: - typedef AnalyzeShape<T> nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::array_intrinsic_type * array_intrinsic_type ; - typedef typename nested::value_type value_type ; - typedef typename nested::type * type ; - - typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type * const_type ; - - typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type * non_const_type ; -}; - -template< class T > -struct AnalyzeShape< T[] > - : public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type -{ -private: - typedef AnalyzeShape<T> nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::array_intrinsic_type array_intrinsic_type [] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [] ; - - typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [] ; - - typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [] ; -}; - -template< class T > -struct AnalyzeShape< const T[] > - : public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type -{ -private: - typedef AnalyzeShape< const T > nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::array_intrinsic_type array_intrinsic_type [] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [] ; - - typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [] ; - - typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [] ; -}; - -template< class T , unsigned N > -struct AnalyzeShape< T[N] > - : public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type -{ -private: - typedef AnalyzeShape<T> nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename ShapeInsert< typename nested::shape , N >::type shape ; - - typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [N] ; - - typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [N] ; - - typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [N] ; -}; - -template< class T , unsigned N > -struct AnalyzeShape< const T[N] > - : public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type -{ -private: - typedef AnalyzeShape< const T > nested ; -public: - - typedef typename nested::specialize specialize ; - - typedef typename ShapeInsert< typename nested::shape , N >::type shape ; - - typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [N] ; - - typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [N] ; - - typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [N] ; -}; - -} // namespace Impl -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index fd7ea845e7633d7415b0b9cd147f1da51ef93632..beafeaa5b50b82fab6dda7db598dc39e4a969f72 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -50,8 +50,9 @@ namespace Kokkos { // Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type). // Must cast-away 'volatile' for the CAS call. -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) __inline__ __device__ int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) { return atomicCAS((int*)dest,compare,val); } @@ -89,38 +90,44 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , template < typename T > __inline__ __device__ T atomic_compare_exchange( volatile T * const dest , const T & compare , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp - int done = 1; - while ( done>0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - return_val = *dest; - if( return_val == compare ) - *dest = val; - Impl::unlock_address_cuda_space( (void*) dest ); - done = 0; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + if( return_val == compare ) + *dest = val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 1; + } } + done_active = __ballot(done); } return return_val; } +#endif +#endif //---------------------------------------------------------------------------- // GCC native CAS supports int, long, unsigned int, unsigned long. // Intel native CAS support int and long with the same interface as GCC. +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION +inline int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) { return __sync_val_compare_and_swap(dest,compare,val); } -KOKKOS_INLINE_FUNCTION +inline long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) { return __sync_val_compare_and_swap(dest,compare,val); } @@ -128,11 +135,11 @@ long atomic_compare_exchange( volatile long * const dest, const long compare, co // GCC supports unsigned -KOKKOS_INLINE_FUNCTION +inline unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val ) { return __sync_val_compare_and_swap(dest,compare,val); } -KOKKOS_INLINE_FUNCTION +inline unsigned long atomic_compare_exchange( volatile unsigned long * const dest , const unsigned long compare , const unsigned long val ) @@ -141,7 +148,7 @@ unsigned long atomic_compare_exchange( volatile unsigned long * const dest , #endif template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_compare_exchange( volatile T * const dest, const T & compare, typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) { @@ -163,7 +170,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, } template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_compare_exchange( volatile T * const dest, const T & compare, typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long) , const T & >::type val ) @@ -187,7 +194,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, #if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_compare_exchange( volatile T * const dest, const T & compare, typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long) && @@ -207,7 +214,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, template < typename T > inline T atomic_compare_exchange( volatile T * const dest , const T compare , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) @@ -254,6 +261,7 @@ T atomic_compare_exchange( volatile T * const dest, const T compare, const T val return retval; } +#endif #endif template <typename T> @@ -262,7 +270,6 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con { return compare == atomic_compare_exchange(dest, compare, val); } - //---------------------------------------------------------------------------- } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp index 1438a37e454e556832549e2137202d971b4a09ce..7fc0e6984bbd2aacdc69dff2f1c6bfeed4493b1a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp @@ -44,6 +44,8 @@ #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT ) #define KOKKOS_ATOMIC_DECREMENT +#include "impl/Kokkos_Atomic_Fetch_Sub.hpp" + namespace Kokkos { // Atomic increment @@ -58,7 +60,7 @@ void atomic_decrement<char>(volatile char* a) { : "memory" ); #else - Kokkos::atomic_fetch_add(a,-1); + Kokkos::atomic_fetch_sub(a, 1); #endif } @@ -73,7 +75,7 @@ void atomic_decrement<short>(volatile short* a) { : "memory" ); #else - Kokkos::atomic_fetch_add(a,-1); + Kokkos::atomic_fetch_sub(a, 1); #endif } @@ -88,7 +90,7 @@ void atomic_decrement<int>(volatile int* a) { : "memory" ); #else - Kokkos::atomic_fetch_add(a,-1); + Kokkos::atomic_fetch_sub(a, 1); #endif } @@ -103,14 +105,14 @@ void atomic_decrement<long long int>(volatile long long int* a) { : "memory" ); #else - Kokkos::atomic_fetch_add(a,-1); + Kokkos::atomic_fetch_sub(a, 1); #endif } template<typename T> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile T* a) { - Kokkos::atomic_fetch_add(a,-1); + Kokkos::atomic_fetch_sub(a, 1); } } // End of namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index e8cac4ba3b82ba097016a3ba80b03b010a7df8c3..ae53b81779a21f285e6d5fe528b7f32a3baca212 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -48,7 +48,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) __inline__ __device__ int atomic_exchange( volatile int * const dest , const int val ) @@ -99,22 +100,26 @@ T atomic_exchange( template < typename T > __inline__ __device__ T atomic_exchange( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp - int done = 1; - while ( done > 0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - return_val = *dest; - *dest = val; - Impl::unlock_address_cuda_space( (void*) dest ); - done = 0; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 1; + } } + done_active = __ballot(done); } return return_val; } @@ -152,12 +157,16 @@ void atomic_assign( (void) atomic_exchange(dest,val); } +#endif +#endif + //---------------------------------------------------------------------------- -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) template< typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long) , const T & >::type val ) @@ -172,7 +181,7 @@ T atomic_exchange( volatile T * const dest , union U { T val_T ; type val_type ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } old ; #else union { T val_T ; type val_type ; } old ; @@ -190,7 +199,7 @@ T atomic_exchange( volatile T * const dest , #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) template< typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) , const T & >::type val ) @@ -198,7 +207,7 @@ T atomic_exchange( volatile T * const dest , union U { Impl::cas128_t i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -218,7 +227,7 @@ T atomic_exchange( volatile T * const dest , template < typename T > inline T atomic_exchange( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) @@ -247,7 +256,7 @@ T atomic_exchange( volatile T * const dest , } template< typename T > -KOKKOS_INLINE_FUNCTION +inline void atomic_assign( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long) , const T & >::type val ) @@ -262,7 +271,7 @@ void atomic_assign( volatile T * const dest , union U { T val_T ; type val_type ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } old ; #else union { T val_T ; type val_type ; } old ; @@ -278,7 +287,7 @@ void atomic_assign( volatile T * const dest , #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) template< typename T > -KOKKOS_INLINE_FUNCTION +inline void atomic_assign( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) , const T & >::type val ) @@ -286,7 +295,7 @@ void atomic_assign( volatile T * const dest , union U { Impl::cas128_t i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -301,7 +310,7 @@ void atomic_assign( volatile T * const dest , template < typename T > inline void atomic_assign( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) @@ -325,7 +334,7 @@ void atomic_assign( volatile T * const dest , #elif defined( KOKKOS_ATOMICS_USE_OMP31 ) template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_exchange( volatile T * const dest , const T val ) { T retval; @@ -339,7 +348,7 @@ T atomic_exchange( volatile T * const dest , const T val ) } template < typename T > -KOKKOS_INLINE_FUNCTION +inline void atomic_assign( volatile T * const dest , const T val ) { //#pragma omp atomic @@ -350,7 +359,7 @@ void atomic_assign( volatile T * const dest , const T val ) } #endif - +#endif } // namespace Kokkos #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 62dfcdd2f88934f8d48b51e0637e9487d92c9a7e..08d2867ab434531a501a6e3b29dca4e2fb63edef 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -48,7 +48,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) // Support for int, unsigned int, unsigned long long int, and float @@ -69,6 +70,12 @@ __inline__ __device__ float atomic_fetch_add( volatile float * const dest , const float val ) { return atomicAdd((float*)dest,val); } +#if ( 600 <= __CUDA_ARCH__ ) +__inline__ __device__ +double atomic_fetch_add( volatile double * const dest , const double val ) +{ return atomicAdd((double*)dest,val); } +#endif + template < typename T > __inline__ __device__ T atomic_fetch_add( volatile T * const dest , @@ -133,31 +140,38 @@ T atomic_fetch_add( volatile T * const dest , template < typename T > __inline__ __device__ T atomic_fetch_add( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp - int done = 1; - while ( done>0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - return_val = *dest; - *dest = return_val + val; - Impl::unlock_address_cuda_space( (void*) dest ); - done = 0; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + bool locked = Impl::lock_address_cuda_space( (void*) dest ); + if( locked ) { + return_val = *dest; + *dest = return_val + val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 1; + } } + done_active = __ballot(done); } return return_val; } +#endif +#endif //---------------------------------------------------------------------------- - -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) -KOKKOS_INLINE_FUNCTION +inline int atomic_fetch_add( volatile int * dest , const int val ) { int original = val; @@ -172,29 +186,29 @@ int atomic_fetch_add( volatile int * dest , const int val ) return original; } #else -KOKKOS_INLINE_FUNCTION +inline int atomic_fetch_add( volatile int * const dest , const int val ) { return __sync_fetch_and_add(dest, val); } #endif -KOKKOS_INLINE_FUNCTION +inline long int atomic_fetch_add( volatile long int * const dest , const long int val ) { return __sync_fetch_and_add(dest,val); } #if defined( KOKKOS_ATOMICS_USE_GCC ) -KOKKOS_INLINE_FUNCTION +inline unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) { return __sync_fetch_and_add(dest,val); } -KOKKOS_INLINE_FUNCTION +inline unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val ) { return __sync_fetch_and_add(dest,val); } #endif template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { @@ -202,7 +216,7 @@ T atomic_fetch_add( volatile T * const dest , union U { int i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } assume , oldval , newval ; #else union U { @@ -223,7 +237,7 @@ T atomic_fetch_add( volatile T * const dest , } template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long) , const T >::type val ) @@ -232,7 +246,7 @@ T atomic_fetch_add( volatile T * const dest , union U { long i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } assume , oldval , newval ; #else union U { @@ -254,7 +268,7 @@ T atomic_fetch_add( volatile T * const dest , #if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 ) template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long) && @@ -263,7 +277,7 @@ T atomic_fetch_add( volatile T * const dest , union U { Impl::cas128_t i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + inline U() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -283,7 +297,7 @@ T atomic_fetch_add( volatile T * const dest , template < typename T > inline T atomic_fetch_add( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 ) @@ -325,7 +339,7 @@ T atomic_fetch_add( volatile T * const dest , const T val ) } #endif - +#endif //---------------------------------------------------------------------------- // Simpler version of atomic_fetch_add without the fetch diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp index 9b7ebae4ac6df12bae659e50aa7da34429ac3187..121a5d51928517981f711c369ae3125ac48e2ade 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp @@ -48,7 +48,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) // Support for int, unsigned int, unsigned long long int, and float @@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const const unsigned long long int val ) { return atomicAnd((unsigned long long int*)dest,val); } #endif - +#endif +#endif //---------------------------------------------------------------------------- +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION +inline int atomic_fetch_and( volatile int * const dest , const int val ) { return __sync_fetch_and_and(dest,val); } -KOKKOS_INLINE_FUNCTION +inline long int atomic_fetch_and( volatile long int * const dest , const long int val ) { return __sync_fetch_and_and(dest,val); } #if defined( KOKKOS_ATOMICS_USE_GCC ) -KOKKOS_INLINE_FUNCTION +inline unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val ) { return __sync_fetch_and_and(dest,val); } -KOKKOS_INLINE_FUNCTION +inline unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val ) { return __sync_fetch_and_and(dest,val); } @@ -108,7 +110,7 @@ T atomic_fetch_and( volatile T * const dest , const T val ) } #endif - +#endif //---------------------------------------------------------------------------- // Simpler version of atomic_fetch_and without the fetch diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp index f15e61a3aea2ac2e7120d88a7151390cc2bf0b73..2c89f56705a51f86f686d5ce6b8cad2c52898bdf 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp @@ -48,7 +48,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) // Support for int, unsigned int, unsigned long long int, and float @@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const const unsigned long long int val ) { return atomicOr((unsigned long long int*)dest,val); } #endif - +#endif +#endif //---------------------------------------------------------------------------- +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION +inline int atomic_fetch_or( volatile int * const dest , const int val ) { return __sync_fetch_and_or(dest,val); } -KOKKOS_INLINE_FUNCTION +inline long int atomic_fetch_or( volatile long int * const dest , const long int val ) { return __sync_fetch_and_or(dest,val); } #if defined( KOKKOS_ATOMICS_USE_GCC ) -KOKKOS_INLINE_FUNCTION +inline unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val ) { return __sync_fetch_and_or(dest,val); } -KOKKOS_INLINE_FUNCTION +inline unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val ) { return __sync_fetch_and_or(dest,val); } @@ -108,7 +110,7 @@ T atomic_fetch_or( volatile T * const dest , const T val ) } #endif - +#endif //---------------------------------------------------------------------------- // Simpler version of atomic_fetch_or without the fetch diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index a3a57aa81c7f303cf74fe5d8d7c6a50dc36eeb2d..b51d2fe7828704e88923382407bb5a4521030bf3 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -48,7 +48,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( KOKKOS_HAVE_CUDA ) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) // Support for int, unsigned int, unsigned long long int, and float @@ -103,7 +104,7 @@ T atomic_fetch_sub( volatile T * const dest , template < typename T > __inline__ __device__ T atomic_fetch_sub( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) @@ -111,44 +112,49 @@ T atomic_fetch_sub( volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - while ( done>0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - return_val = *dest; - *dest = return_val - val; - Impl::unlock_address_cuda_space( (void*) dest ); - done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = return_val - val; + Impl::unlock_address_cuda_space( (void*) dest ); + done = 1; + } } + done_active = __ballot(done); } return return_val; } - +#endif +#endif //---------------------------------------------------------------------------- +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION +inline int atomic_fetch_sub( volatile int * const dest , const int val ) { return __sync_fetch_and_sub(dest,val); } -KOKKOS_INLINE_FUNCTION +inline long int atomic_fetch_sub( volatile long int * const dest , const long int val ) { return __sync_fetch_and_sub(dest,val); } #if defined( KOKKOS_ATOMICS_USE_GCC ) -KOKKOS_INLINE_FUNCTION +inline unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) { return __sync_fetch_and_sub(dest,val); } -KOKKOS_INLINE_FUNCTION +inline unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val ) { return __sync_fetch_and_sub(dest,val); } #endif template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { @@ -166,7 +172,7 @@ T atomic_fetch_sub( volatile T * const dest , } template < typename T > -KOKKOS_INLINE_FUNCTION +inline T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long) , const T >::type val ) @@ -190,7 +196,7 @@ T atomic_fetch_sub( volatile T * const dest , template < typename T > inline T atomic_fetch_sub( volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) @@ -219,7 +225,7 @@ T atomic_fetch_sub( volatile T * const dest , const T val ) } #endif - +#endif // Simpler version of atomic_fetch_sub without the fetch template <typename T> KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index 343e9bf4c48fa499199930ebbf9a1fb893e475da..527e1bb4e334e7a9e83a0021061f07fd5900db18 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -147,7 +147,7 @@ struct RShiftOper { template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { union { unsigned long long int i ; T t ; } oldval , assume , newval ; @@ -157,7 +157,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , do { assume.i = oldval.i ; newval.t = Oper::apply(assume.t, val) ; - oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); + oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); return oldval.t ; @@ -166,7 +166,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { union { unsigned long long int i ; T t ; } oldval , assume , newval ; @@ -176,7 +176,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , do { assume.i = oldval.i ; newval.t = Oper::apply(assume.t, val) ; - oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); + oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); return newval.t ; @@ -185,7 +185,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { union { int i ; T t ; } oldval , assume , newval ; @@ -194,7 +194,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , do { assume.i = oldval.i ; newval.t = Oper::apply(assume.t, val) ; - oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); + oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); return oldval.t ; @@ -203,7 +203,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val ) + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val ) { union { int i ; T t ; } oldval , assume , newval ; @@ -212,7 +212,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , do { assume.i = oldval.i ; newval.t = Oper::apply(assume.t, val) ; - oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); + oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); return newval.t ; @@ -221,7 +221,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) @@ -238,15 +238,20 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , return return_val; #else // This is a way to (hopefully) avoid dead lock in a warp - int done = 1; - while ( done>0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - T return_val = *dest; - *dest = Oper::apply(return_val, val);; - Impl::unlock_address_cuda_space( (void*) dest ); - done=0; + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = *dest; + *dest = Oper::apply(return_val, val);; + Impl::unlock_address_cuda_space( (void*) dest ); + done=1; + } } + done_active = __ballot(done); } return return_val; #endif @@ -255,7 +260,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , template < class Oper, typename T > KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , - typename ::Kokkos::Impl::enable_if< + typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) @@ -271,16 +276,21 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , Impl::unlock_address_host_space( (void*) dest ); return return_val; #else + T return_val; // This is a way to (hopefully) avoid dead lock in a warp - int done = 1; - while ( done>0 ) { - done++; - if( Impl::lock_address_cuda_space( (void*) dest ) ) { - T return_val = Oper::apply(*dest, val); - *dest = return_val; - Impl::unlock_address_cuda_space( (void*) dest ); - done=0; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active!=done_active) { + if(!done) { + if( Impl::lock_address_cuda_space( (void*) dest ) ) { + return_val = Oper::apply(*dest, val); + *dest = return_val; + Impl::unlock_address_cuda_space( (void*) dest ); + done=1; + } } + done_active = __ballot(done); } return return_val; #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp index b9d23bd815433a0a91c282dd6e787b7d16f8b0e3..8ee094675cb861f9daf2c8b054b6dbf7517b401d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp @@ -90,10 +90,10 @@ GetSystemInfo(&info); int mpi_ranks_per_node() { char *str; int ppn = 1; - if ((str = getenv("SLURM_TASKS_PER_NODE"))) { - ppn = atoi(str); - if(ppn<=0) ppn = 1; - } + //if ((str = getenv("SLURM_TASKS_PER_NODE"))) { + // ppn = atoi(str); + // if(ppn<=0) ppn = 1; + //} if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) { ppn = atoi(str); if(ppn<=0) ppn = 1; @@ -108,9 +108,9 @@ int mpi_ranks_per_node() { int mpi_local_rank_on_node() { char *str; int local_rank=0; - if ((str = getenv("SLURM_LOCALID"))) { - local_rank = atoi(str); - } + //if ((str = getenv("SLURM_LOCALID"))) { + // local_rank = atoi(str); + //} if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { local_rank = atoi(str); } diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 567a2141405719e3331b2327ca40097c24af775a..de1085986848232f0510548bfa4375d63855f652 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -84,8 +84,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0); #endif // defined( KOKKOS_HAVE_CUDA ) #if defined( KOKKOS_HAVE_OPENMP ) - if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { if(num_threads>0) { if(use_numa>0) { Kokkos::OpenMP::initialize(num_threads,use_numa); @@ -104,8 +104,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0); #endif #if defined( KOKKOS_HAVE_PTHREAD ) - if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { if(num_threads>0) { if(use_numa>0) { Kokkos::Threads::initialize(num_threads,use_numa); @@ -129,14 +129,14 @@ setenv("MEMKIND_HBW_NODES", "1", 0); // struct, you may remove this line of code. (void) args; - if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { Kokkos::Serial::initialize(); } #endif #if defined( KOKKOS_HAVE_CUDA ) - if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) { + if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) { if (use_gpu > -1) { Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) ); } @@ -155,16 +155,20 @@ setenv("MEMKIND_HBW_NODES", "1", 0); void finalize_internal( const bool all_spaces = false ) { +#if (KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); +#endif + #if defined( KOKKOS_HAVE_CUDA ) - if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) { + if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) { if(Kokkos::Cuda::is_initialized()) Kokkos::Cuda::finalize(); } #endif #if defined( KOKKOS_HAVE_OPENMP ) - if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value || + if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value || all_spaces ) { if(Kokkos::OpenMP::is_initialized()) Kokkos::OpenMP::finalize(); @@ -172,8 +176,8 @@ void finalize_internal( const bool all_spaces = false ) #endif #if defined( KOKKOS_HAVE_PTHREAD ) - if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || + if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || all_spaces ) { if(Kokkos::Threads::is_initialized()) Kokkos::Threads::finalize(); @@ -181,46 +185,41 @@ void finalize_internal( const bool all_spaces = false ) #endif #if defined( KOKKOS_HAVE_SERIAL ) - if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value || + if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value || all_spaces ) { if(Kokkos::Serial::is_initialized()) Kokkos::Serial::finalize(); } #endif - -#if (KOKKOS_ENABLE_PROFILING) - Kokkos::Profiling::finalize(); -#endif - } void fence_internal() { #if defined( KOKKOS_HAVE_CUDA ) - if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) { + if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) { Kokkos::Cuda::fence(); } #endif #if defined( KOKKOS_HAVE_OPENMP ) - if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { Kokkos::OpenMP::fence(); } #endif #if defined( KOKKOS_HAVE_PTHREAD ) - if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { Kokkos::Threads::fence(); } #endif #if defined( KOKKOS_HAVE_SERIAL ) - if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || - Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { + if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { Kokkos::Serial::fence(); } #endif @@ -350,11 +349,11 @@ void initialize(int& narg, char* arg[]) if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) { char *str; - if ((str = getenv("SLURM_LOCALID"))) { - int local_rank = atoi(str); - device = local_rank % ndevices; - if (device >= skip_device) device++; - } + //if ((str = getenv("SLURM_LOCALID"))) { + // int local_rank = atoi(str); + // device = local_rank % ndevices; + // if (device >= skip_device) device++; + //} if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); device = local_rank % ndevices; diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp index 5f88d662069bcb6313c803073385736e23a93456..5fab5eb9a7bd07c9868214607931d03e6ff770b0 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -46,7 +46,7 @@ #include <string> #include <iosfwd> -#include <KokkosCore_config.h> +#include <Kokkos_Macros.hpp> #ifdef KOKKOS_HAVE_CUDA #include <Cuda/Kokkos_Cuda_abort.hpp> #endif @@ -68,12 +68,18 @@ std::string human_memory_size(size_t arg_bytes); //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + namespace Kokkos { -inline -void abort( const char * const message ) { Kokkos::Impl::host_abort(message); } +KOKKOS_INLINE_FUNCTION +void abort( const char * const message ) { +#ifdef __CUDA_ARCH__ + Kokkos::Impl::cuda_abort(message); +#else + Kokkos::Impl::host_abort(message); +#endif +} + } -#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index 78b6794491a77b78c1025b10fbe3d214fdc71fdb..66c3157c3aba4f6ae4b187d859790986c2458316 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -129,14 +129,14 @@ struct FunctorValueTraits< FunctorType , ArgTag , true /* == exists FunctorType: // Number of values if single value template< class F > KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type + typename Impl::enable_if< std::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type value_count( const F & ) { return 1 ; } // Number of values if an array, protect via templating because 'f.value_count' // will only exist when the functor declares the value_type to be an array. template< class F > KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type + typename Impl::enable_if< std::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type value_count( const F & f ) { return f.value_count ; } // Total size of the value @@ -157,7 +157,7 @@ private: struct REJECTTAG {}; // Reject tagged operator() when using non-tagged execution policy. typedef typename - Impl::if_c< Impl::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ; + Impl::if_c< std::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ; //---------------------------------------- // parallel_for operator without a tag: @@ -339,8 +339,8 @@ private: typedef decltype( deduce_reduce_type( tag_type() , & FunctorType::operator() ) ) ValueType ; - enum { IS_VOID = Impl::is_same<VOIDTAG ,ValueType>::value }; - enum { IS_REJECT = Impl::is_same<REJECTTAG,ValueType>::value }; + enum { IS_VOID = std::is_same<VOIDTAG ,ValueType>::value }; + enum { IS_REJECT = std::is_same<REJECTTAG,ValueType>::value }; public: diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 11cc120212b25804df0afb9f660ff8b165e0f217..95340261122ff51361bb45da62e1f236c1aef78d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -62,6 +62,10 @@ #include <memkind.h> #endif +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#endif + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #ifdef KOKKOS_HAVE_HBWSPACE @@ -219,6 +223,10 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s } } +constexpr const char* HBWSpace::name() { + return m_name; +} + } // namespace Experimental } // namespace Kokkos @@ -226,7 +234,6 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { SharedAllocationRecord< void , void > @@ -242,6 +249,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: ~SharedAllocationRecord() { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label, + data(),size()); + } + #endif + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr , SharedAllocationRecord< void , void >::m_alloc_size ); @@ -263,6 +278,12 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space ) , m_space( arg_space ) { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); + } + #endif + // Fill in the Header information RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); @@ -306,7 +327,7 @@ reallocate_tracked( void * const arg_alloc_ptr SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); - Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data() + Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace,Kokkos::Experimental::HBWSpace>( r_new->data() , r_old->data() , std::min( r_old->size() , r_new->size() ) ); RecordBase::increment( r_new ); @@ -325,7 +346,7 @@ SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( voi RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ; if ( ! alloc_ptr || record->m_alloc_ptr != head ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) ); } return record ; @@ -339,7 +360,6 @@ print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , } } // namespace Impl -} // namespace Experimental } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index b52f4591ef0b8c0b71445f6e33b4d913822e5446..bfd13572b95208c9c8397728a460f79ae42465dd 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -43,7 +43,9 @@ #include <algorithm> #include <Kokkos_Macros.hpp> - +#if (KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#endif /*--------------------------------------------------------------------------*/ #if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA ) @@ -333,13 +335,15 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_ } } +constexpr const char* HostSpace::name() { + return m_name; +} } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { SharedAllocationRecord< void , void > @@ -355,6 +359,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::HostSpace , void >:: ~SharedAllocationRecord() { + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label, + data(),size()); + } + #endif + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr , SharedAllocationRecord< void , void >::m_alloc_size ); @@ -376,6 +388,11 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space ) , m_space( arg_space ) { +#if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); + } +#endif // Fill in the Header information RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); @@ -438,7 +455,7 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ; if ( ! alloc_ptr || record->m_alloc_ptr != head ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) ); + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) ); } return record ; @@ -452,55 +469,6 @@ print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail } } // namespace Impl -} // namespace Experimental -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template< class > -struct ViewOperatorBoundsErrorAbort ; - -template<> -struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > { - static void apply( const size_t rank - , const size_t n0 , const size_t n1 - , const size_t n2 , const size_t n3 - , const size_t n4 , const size_t n5 - , const size_t n6 , const size_t n7 - , const size_t i0 , const size_t i1 - , const size_t i2 , const size_t i3 - , const size_t i4 , const size_t i5 - , const size_t i6 , const size_t i7 ); -}; - -void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >:: -apply( const size_t rank - , const size_t n0 , const size_t n1 - , const size_t n2 , const size_t n3 - , const size_t n4 , const size_t n5 - , const size_t n6 , const size_t n7 - , const size_t i0 , const size_t i1 - , const size_t i2 , const size_t i3 - , const size_t i4 , const size_t i5 - , const size_t i6 , const size_t i7 ) -{ - char buffer[512]; - - snprintf( buffer , sizeof(buffer) - , "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)" - , rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 - , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); - - Kokkos::Impl::throw_runtime_exception( buffer ); -} - -} // namespace Impl -} // namespace Experimental } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index eb3da7501ebeeda048e0e8c78e81f20fb60060fa..5155c66df9b77fa667996cf191d862e6b05d8b52 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -50,7 +50,7 @@ namespace Kokkos { KOKKOS_FORCEINLINE_FUNCTION void memory_fence() { -#if defined( KOKKOS_ATOMICS_USE_CUDA ) +#if defined( __CUDA_ARCH__ ) __threadfence(); #elif defined( KOKKOS_ATOMICS_USE_GCC ) || \ ( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) ) diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 91faed170abbeb6d552b6247c74afdaa1596e038..99c5df4db31001b42f56337938f5a7ea73941157 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -48,6 +48,11 @@ namespace Kokkos { namespace Profiling { + + SpaceHandle::SpaceHandle(const char* space_name) { + strncpy(name,space_name,64); + } + bool profileLibraryLoaded() { return (NULL != initProfileLibrary); } @@ -94,6 +99,33 @@ namespace Kokkos { } } + + void pushRegion(const std::string& kName) { + if( NULL != pushRegionCallee ) { + Kokkos::fence(); + (*pushRegionCallee)(kName.c_str()); + } + } + + void popRegion() { + if( NULL != popRegionCallee ) { + Kokkos::fence(); + (*popRegionCallee)(); + } + } + + void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { + if(NULL != allocateDataCallee) { + (*allocateDataCallee)(space,label.c_str(),ptr,size); + } + } + + void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { + if(NULL != allocateDataCallee) { + (*deallocateDataCallee)(space,label.c_str(),ptr,size); + } + } + void initialize() { // Make sure initialize calls happens only once @@ -145,6 +177,17 @@ namespace Kokkos { initProfileLibrary = *((initFunction*) &p7); auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library"); finalizeProfileLibrary = *((finalizeFunction*) &p8); + + auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region"); + pushRegionCallee = *((pushFunction*) &p9); + auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region"); + popRegionCallee = *((popFunction*) &p10); + + auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data"); + allocateDataCallee = *((allocateDataFunction*) &p11); + auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data"); + deallocateDataCallee = *((deallocateDataFunction*) &p12); + } } @@ -170,14 +213,22 @@ namespace Kokkos { // Set all profile hooks to NULL to prevent // any additional calls. Once we are told to // finalize, we mean it + initProfileLibrary = NULL; + finalizeProfileLibrary = NULL; + beginForCallee = NULL; beginScanCallee = NULL; beginReduceCallee = NULL; endScanCallee = NULL; endForCallee = NULL; endReduceCallee = NULL; - initProfileLibrary = NULL; - finalizeProfileLibrary = NULL; + + pushRegionCallee = NULL; + popRegionCallee = NULL; + + allocateDataCallee = NULL; + deallocateDataCallee = NULL; + } } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 4f01256335cd82962d1744a9895374c170a5cb8b..3d6a3892524ee3234a33f14cf7727cac5512e455 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -48,6 +48,7 @@ #include <Kokkos_Core_fwd.hpp> #include <Kokkos_Macros.hpp> #include <string> +#include <cinttypes> #if (KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_DeviceInfo.hpp> @@ -62,6 +63,11 @@ namespace Kokkos { namespace Profiling { + struct SpaceHandle { + SpaceHandle(const char* space_name); + char name[64]; + }; + typedef void (*initFunction)(const int, const uint64_t, const uint32_t, @@ -70,8 +76,16 @@ namespace Kokkos { typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*); typedef void (*endFunction)(uint64_t); + typedef void (*pushFunction)(const char*); + typedef void (*popFunction)(); + + typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); + typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); + + static initFunction initProfileLibrary = NULL; static finalizeFunction finalizeProfileLibrary = NULL; + static beginFunction beginForCallee = NULL; static beginFunction beginScanCallee = NULL; static beginFunction beginReduceCallee = NULL; @@ -79,6 +93,13 @@ namespace Kokkos { static endFunction endScanCallee = NULL; static endFunction endReduceCallee = NULL; + static pushFunction pushRegionCallee = NULL; + static popFunction popRegionCallee = NULL; + + static allocateDataFunction allocateDataCallee = NULL; + static deallocateDataFunction deallocateDataCallee = NULL; + + bool profileLibraryLoaded(); void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); @@ -88,6 +109,12 @@ namespace Kokkos { void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); void endParallelReduce(const uint64_t kernelID); + void pushRegion(const std::string& kName); + void popRegion(); + + void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); + void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); + void initialize(); void finalize(); @@ -105,8 +132,14 @@ namespace Kokkos { endScanCallee = NULL; endForCallee = NULL; endReduceCallee = NULL; + + allocateDataCallee = NULL; + deallocateDataCallee = NULL; + initProfileLibrary = NULL; finalizeProfileLibrary = NULL; + pushRegionCallee = NULL; + popRegionCallee = NULL; } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index e8bdbde6c60f182f588617dda2a9c2f32530694c..eb881545d2270d9cfa4b3e06c8a3a262e3c4fd7a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -43,8 +43,9 @@ #include <Kokkos_Core.hpp> -#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include <impl/Kokkos_Serial_Task.hpp> #include <impl/Kokkos_TaskQueue_impl.hpp> //---------------------------------------------------------------------------- @@ -143,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > :: }} /* namespace Kokkos::Impl */ -#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 48a110c5f1583cd4943a011f3d33bd25e3cd00f2..473b7aadb2e29984b880ff1868b11b6461ed9df9 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,9 @@ #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP #define KOKKOS_IMPL_SERIAL_TASK_HPP -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include <impl/Kokkos_TaskQueue.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -128,47 +130,63 @@ struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > > {} }; +//---------------------------------------------------------------------------- + +template<typename iType> +struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > > +{ + typedef iType index_type; + const iType start ; + const iType end ; + enum {increment = 1}; + TaskExec< Kokkos::Serial > & thread; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct + ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) + : start( 0 ) + , end(arg_count) + , thread(arg_thread) + {} +}; + }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -/* -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread - , const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); -} -*/ -//TODO const issue omp -template<typename iType> + +// OMP version needs non-const TaskExec +template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread - , const iType & count ) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > > +TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count ); } -/* -template<typename iType> + +// OMP version needs non-const TaskExec +template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > > -TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end ) +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::TaskExec< Kokkos::Serial > > +TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end); + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( + thread, iType(start), iType(end) ); } -*/ -//TODO const issue omp + +// OMP version needs non-const TaskExec template<typename iType> KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end ) +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > +ThreadVectorRange + ( Impl::TaskExec< Kokkos::Serial > & thread + , const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end); + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. @@ -177,7 +195,7 @@ TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start * This functionality requires C++11 support.*/ template<typename iType, class Lambda> KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) { +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) { for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) lambda(i); } @@ -213,7 +231,7 @@ void parallel_reduce initialized_result = result; } -// placeholder for future function + template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION void parallel_reduce @@ -221,8 +239,17 @@ void parallel_reduce const Lambda & lambda, ValueType& initialized_result) { + initialized_result = ValueType(); +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + initialized_result+=tmp; + } } -// placeholder for future function + template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION void parallel_reduce @@ -231,6 +258,16 @@ void parallel_reduce const JoinType & join, ValueType& initialized_result) { + ValueType result = initialized_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + initialized_result = result; } template< typename ValueType, typename iType, class Lambda > @@ -266,6 +303,6 @@ void parallel_scan //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp deleted file mode 100644 index 1577df07cd74f2634f9f98cc94d3825062ad3ff6..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp +++ /dev/null @@ -1,348 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#include <impl/Kokkos_Serial_TaskPolicy.hpp> - -#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) - -#include <stdlib.h> -#include <stdexcept> -#include <iostream> -#include <sstream> -#include <string> - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -TaskPolicy< Kokkos::Serial >::member_type & -TaskPolicy< Kokkos::Serial >::member_single() -{ - static member_type s(0,1,0); - return s ; -} - -} // namespace Experimental -} // namespace Kokkos - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -typedef TaskMember< Kokkos::Serial , void , void > Task ; - -//---------------------------------------------------------------------------- - -namespace { - -inline -unsigned padded_sizeof_derived( unsigned sizeof_derived ) -{ - return sizeof_derived + - ( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 ); -} - -} // namespace - -void Task::deallocate( void * ptr ) -{ - free( ptr ); -} - -void * Task::allocate( const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity ) -{ - return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) ); -} - -Task::~TaskMember() -{ - -} - -Task::TaskMember( const Task::function_verify_type arg_verify - , const Task::function_dealloc_type arg_dealloc - , const Task::function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : m_dealloc( arg_dealloc ) - , m_verify( arg_verify ) - , m_apply( arg_apply ) - , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) - , m_wait( 0 ) - , m_next( 0 ) - , m_dep_capacity( arg_dependence_capacity ) - , m_dep_size( 0 ) - , m_ref_count( 0 ) - , m_state( TASK_STATE_CONSTRUCTING ) -{ - for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; -} - -Task::TaskMember( const Task::function_dealloc_type arg_dealloc - , const Task::function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : m_dealloc( arg_dealloc ) - , m_verify( & Task::verify_type<void> ) - , m_apply( arg_apply ) - , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) - , m_wait( 0 ) - , m_next( 0 ) - , m_dep_capacity( arg_dependence_capacity ) - , m_dep_size( 0 ) - , m_ref_count( 0 ) - , m_state( TASK_STATE_CONSTRUCTING ) -{ - for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; -} - -//---------------------------------------------------------------------------- - -void Task::throw_error_add_dependence() const -{ - std::cerr << "TaskMember< Serial >::add_dependence ERROR" - << " state(" << m_state << ")" - << " dep_size(" << m_dep_size << ")" - << std::endl ; - throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR"); -} - -void Task::throw_error_verify_type() -{ - throw std::runtime_error("TaskMember< Serial >::verify_type ERROR"); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - -void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) -{ - static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ; - static const char msg_error_count[] = ": negative reference count" ; - static const char msg_error_complete[] = ": destroy task that is not complete" ; - static const char msg_error_dependences[] = ": destroy task that has dependences" ; - static const char msg_error_exception[] = ": caught internal exception" ; - - const char * msg_error = 0 ; - - try { - - if ( *lhs ) { - - const int count = --((**lhs).m_ref_count); - - if ( 0 == count ) { - - // Reference count at zero, delete it - - // Should only be deallocating a completed task - if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) { - - // A completed task should not have dependences... - for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) { - if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ; - } - } - else { - msg_error = msg_error_complete ; - } - - if ( 0 == msg_error ) { - // Get deletion function and apply it - const Task::function_dealloc_type d = (**lhs).m_dealloc ; - - (*d)( *lhs ); - } - } - else if ( count <= 0 ) { - msg_error = msg_error_count ; - } - } - - if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); } - - *lhs = rhs ; - } - catch( ... ) { - if ( 0 == msg_error ) msg_error = msg_error_exception ; - } - - if ( 0 != msg_error ) { - if ( no_throw ) { - std::cerr << msg_error_header << msg_error << std::endl ; - std::cerr.flush(); - } - else { - std::string msg(msg_error_header); - msg.append(msg_error); - throw std::runtime_error( msg ); - } - } -} -#endif - -namespace { - -Task * s_ready = 0 ; -Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) ); - -} - -void Task::schedule() -{ - // Execute ready tasks in case the task being scheduled - // is dependent upon a waiting and ready task. - - Task::execute_ready_tasks(); - - // spawning : Constructing -> Waiting - // respawning : Executing -> Waiting - // updating : Waiting -> Waiting - - // Must not be in a dependence linked list: 0 == t->m_next - - const bool ok_state = TASK_STATE_COMPLETE != m_state ; - const bool ok_list = 0 == m_next ; - - if ( ok_state && ok_list ) { - - if ( TASK_STATE_CONSTRUCTING == m_state ) { - // Initial scheduling increment, - // matched by decrement when task is complete. - ++m_ref_count ; - } - - // Will be waiting for execution upon return from this function - - m_state = Kokkos::Experimental::TASK_STATE_WAITING ; - - // Insert this task into another dependence that is not complete - - int i = 0 ; - for ( ; i < m_dep_size ; ++i ) { - Task * const y = m_dep[i] ; - if ( y && s_denied != ( m_next = y->m_wait ) ) { - y->m_wait = this ; // CAS( & y->m_wait , m_next , this ); - break ; - } - } - if ( i == m_dep_size ) { - // All dependences are complete, insert into the ready list - m_next = s_ready ; - s_ready = this ; // CAS( & s_ready , m_next = s_ready , this ); - } - } - else { - throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error")); - } -} - -void Task::execute_ready_tasks() -{ - while ( s_ready ) { - - // Remove this task from the ready list - - // Task * task ; - // while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) ); - - Task * task = s_ready ; - - s_ready = task->m_next ; - - task->m_next = 0 ; - - // precondition: task->m_state = TASK_STATE_WAITING - // precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i - // precondition: does not exist T such that T->m_wait = task - // precondition: does not exist T such that T->m_next = task - - task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ; - - (*task->m_apply)( task ); - - if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) { - // task did not respawn itself - task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ; - - // release dependences: - for ( int i = 0 ; i < task->m_dep_size ; ++i ) { - assign( task->m_dep + i , 0 ); - } - - // Stop other tasks from adding themselves to 'task->m_wait' ; - - Task * x ; - // CAS( & task->m_wait , x = task->m_wait , s_denied ); - x = task->m_wait ; task->m_wait = s_denied ; - - // update tasks waiting on this task - while ( x ) { - Task * const next = x->m_next ; - - x->m_next = 0 ; - - x->schedule(); // could happen concurrently - - x = next ; - } - - // Decrement to match the initial scheduling increment - assign( & task , 0 ); - } - } -} - -} // namespace Impl -} // namespace Experimental -} // namespace Kokkos - -#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp deleted file mode 100644 index a333f948ae18e3e3622d06551dd935aff0d77707..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp +++ /dev/null @@ -1,677 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP -#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP - -#include <Kokkos_Macros.hpp> - -#if defined( KOKKOS_HAVE_SERIAL ) - -#include <string> -#include <typeinfo> -#include <stdexcept> - -#include <Kokkos_Serial.hpp> -#include <Kokkos_TaskPolicy.hpp> -#include <Kokkos_View.hpp> - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -#include <impl/Kokkos_FunctorAdapter.hpp> - -//---------------------------------------------------------------------------- -/* Inheritance structure to allow static_cast from the task root type - * and a task's FunctorType. - * - * task_root_type == TaskMember< Space , void , void > - * - * TaskMember< PolicyType , ResultType , FunctorType > - * : TaskMember< PolicyType::Space , ResultType , FunctorType > - * { ... }; - * - * TaskMember< Space , ResultType , FunctorType > - * : TaskMember< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * when ResultType != void - * - * TaskMember< Space , ResultType , void > - * : TaskMember< Space , void , void > - * { ... }; - * - */ -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -/** \brief Base class for all tasks in the Serial execution space */ -template<> -class TaskMember< Kokkos::Serial , void , void > -{ -public: - - typedef void (* function_apply_type) ( TaskMember * ); - typedef void (* function_dealloc_type)( TaskMember * ); - typedef TaskMember * (* function_verify_type) ( TaskMember * ); - -private: - - const function_dealloc_type m_dealloc ; ///< Deallocation - const function_verify_type m_verify ; ///< Result type verification - const function_apply_type m_apply ; ///< Apply function - TaskMember ** const m_dep ; ///< Dependences - TaskMember * m_wait ; ///< Linked list of tasks waiting on this task - TaskMember * m_next ; ///< Linked list of tasks waiting on a different task - const int m_dep_capacity ; ///< Capacity of dependences - int m_dep_size ; ///< Actual count of dependences - int m_ref_count ; ///< Reference count - int m_state ; ///< State of the task - - // size = 6 Pointers + 4 ints - - TaskMember() /* = delete */ ; - TaskMember( const TaskMember & ) /* = delete */ ; - TaskMember & operator = ( const TaskMember & ) /* = delete */ ; - - static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity ); - static void deallocate( void * ); - - void throw_error_add_dependence() const ; - static void throw_error_verify_type(); - - template < class DerivedTaskType > - static - void deallocate( TaskMember * t ) - { - DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t); - ptr->~DerivedTaskType(); - deallocate( (void *) ptr ); - } - -protected : - - ~TaskMember(); - - // Used by TaskMember< Serial , ResultType , void > - TaskMember( const function_verify_type arg_verify - , const function_dealloc_type arg_dealloc - , const function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ); - - // Used for TaskMember< Serial , void , void > - TaskMember( const function_dealloc_type arg_dealloc - , const function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ); - -public: - - template< typename ResultType > - KOKKOS_FUNCTION static - TaskMember * verify_type( TaskMember * t ) - { - enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value }; - - if ( check_type && t != 0 ) { - - // Verify that t->m_verify is this function - const function_verify_type self = & TaskMember::template verify_type< ResultType > ; - - if ( t->m_verify != self ) { - t = 0 ; -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - throw_error_verify_type(); -#endif - } - } - return t ; - } - - //---------------------------------------- - /* Inheritence Requirements on task types: - * typedef FunctorType::value_type value_type ; - * class DerivedTaskType - * : public TaskMember< Serial , value_type , FunctorType > - * { ... }; - * class TaskMember< Serial , value_type , FunctorType > - * : public TaskMember< Serial , value_type , void > - * , public Functor - * { ... }; - * If value_type != void - * class TaskMember< Serial , value_type , void > - * : public TaskMember< Serial , void , void > - * - * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] - * - */ - - /** \brief Allocate and construct a single-thread task */ - template< class DerivedTaskType > - static - TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor - , const unsigned arg_dependence_capacity - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - typedef typename functor_type::value_type value_type ; - - DerivedTaskType * const task = - new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) - DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > - , & TaskMember::template apply_single< functor_type , value_type > - , sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_functor ); - - return static_cast< TaskMember * >( task ); - } - - /** \brief Allocate and construct a data parallel task */ - template< class DerivedTaskType > - static - TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy - , const typename DerivedTaskType::functor_type & arg_functor - , const unsigned arg_dependence_capacity - ) - { - DerivedTaskType * const task = - new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) - DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > - , sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_policy - , arg_functor - ); - - return static_cast< TaskMember * >( task ); - } - - /** \brief Allocate and construct a thread-team task */ - template< class DerivedTaskType > - static - TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor - , const unsigned arg_dependence_capacity - ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - typedef typename functor_type::value_type value_type ; - - DerivedTaskType * const task = - new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) - DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > - , & TaskMember::template apply_team< functor_type , value_type > - , sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_functor ); - - return static_cast< TaskMember * >( task ); - } - - void schedule(); - static void execute_ready_tasks(); - - //---------------------------------------- - - typedef FutureValueTypeIsVoidError get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return get_result_type() ; } - - KOKKOS_INLINE_FUNCTION - Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } - - //---------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - static - void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ); -#else - KOKKOS_INLINE_FUNCTION static - void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {} -#endif - - KOKKOS_INLINE_FUNCTION - TaskMember * get_dependence( int i ) const - { return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; } - - KOKKOS_INLINE_FUNCTION - int get_dependence() const - { return m_dep_size ; } - - KOKKOS_INLINE_FUNCTION - void clear_dependence() - { - for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 ); - m_dep_size = 0 ; - } - - KOKKOS_INLINE_FUNCTION - void add_dependence( TaskMember * before ) - { - if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state || - Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) && - m_dep_size < m_dep_capacity ) { - assign( m_dep + m_dep_size , before ); - ++m_dep_size ; - } - else { - throw_error_add_dependence(); - } - } - - //---------------------------------------- - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; - - // TaskMember< Kokkos::Serial , ResultType , FunctorType > - // : public TaskMember< Kokkos::Serial , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result ); - } - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; - - // TaskMember< Kokkos::Serial , ResultType , FunctorType > - // : public TaskMember< Kokkos::Serial , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m ); - } - - //---------------------------------------- - - template< class FunctorType , class ResultType > - static - void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; - typedef Kokkos::Impl::SerialTeamMember member_type ; - - // TaskMember< Kokkos::Serial , ResultType , FunctorType > - // : public TaskMember< Kokkos::Serial , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - m.FunctorType::apply( member_type(0,1,0) , m.m_result ); - } - - template< class FunctorType , class ResultType > - static - void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ; - typedef Kokkos::Impl::SerialTeamMember member_type ; - - // TaskMember< Kokkos::Serial , ResultType , FunctorType > - // : public TaskMember< Kokkos::Serial , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - m.FunctorType::apply( member_type(0,1,0) ); - } -}; - -//---------------------------------------------------------------------------- -/** \brief Base class for tasks with a result value in the Serial execution space. - * - * The FunctorType must be void because this class is accessed by the - * Future class for the task and result value. - * - * Must be derived from TaskMember<S,void,void> 'root class' so the Future class - * can correctly static_cast from the 'root class' to this class. - */ -template < class ResultType > -class TaskMember< Kokkos::Serial , ResultType , void > - : public TaskMember< Kokkos::Serial , void , void > -{ -public: - - ResultType m_result ; - - typedef const ResultType & get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return m_result ; } - -protected: - - typedef TaskMember< Kokkos::Serial , void , void > task_root_type ; - typedef task_root_type::function_dealloc_type function_dealloc_type ; - typedef task_root_type::function_apply_type function_apply_type ; - - inline - TaskMember( const function_dealloc_type arg_dealloc - , const function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : task_root_type( & task_root_type::template verify_type< ResultType > - , arg_dealloc - , arg_apply - , arg_sizeof_derived - , arg_dependence_capacity ) - , m_result() - {} -}; - -template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Serial , ResultType , FunctorType > - : public TaskMember< Kokkos::Serial , ResultType , void > - , public FunctorType -{ -public: - - typedef FunctorType functor_type ; - - typedef TaskMember< Kokkos::Serial , void , void > task_root_type ; - typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ; - typedef task_root_type::function_dealloc_type function_dealloc_type ; - typedef task_root_type::function_apply_type function_apply_type ; - - inline - TaskMember( const function_dealloc_type arg_dealloc - , const function_apply_type arg_apply - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - , const functor_type & arg_functor - ) - : task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity ) - , functor_type( arg_functor ) - {} -}; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -template<> -class TaskPolicy< Kokkos::Serial > -{ -public: - - typedef Kokkos::Serial execution_space ; - typedef Kokkos::Impl::SerialTeamMember member_type ; - -private: - - typedef Impl::TaskMember< execution_space , void , void > task_root_type ; - - template< class FunctorType > - static inline - const task_root_type * get_task_root( const FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); - } - - template< class FunctorType > - static inline - task_root_type * get_task_root( FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< task_root_type * >( static_cast< task_type * >(f) ); - } - - unsigned m_default_dependence_capacity ; - -public: - - // Stubbed out for now. - KOKKOS_INLINE_FUNCTION - int allocated_task_count() const { return 0 ; } - - TaskPolicy - ( const unsigned /* arg_task_max_count */ - , const unsigned /* arg_task_max_size */ - , const unsigned arg_task_default_dependence_capacity = 4 - , const unsigned /* arg_task_team_size */ = 0 - ) - : m_default_dependence_capacity( arg_task_default_dependence_capacity ) - {} - - KOKKOS_FUNCTION TaskPolicy() = default ; - KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; - - //---------------------------------------- - - template< class ValueType > - KOKKOS_INLINE_FUNCTION - const Future< ValueType , execution_space > & - spawn( const Future< ValueType , execution_space > & f - , const bool priority = false ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - f.m_task->schedule(); -#endif - return f ; - } - - //---------------------------------------- - // Create single-thread task - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type::create< task_type >( - functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) ) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - proc_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { return task_create( functor , dependence_capacity ); } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type::create_team< task_type >( - functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) ) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - proc_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { return task_create_team( functor , dependence_capacity ); } - - //---------------------------------------- - // Add dependence - template< class A1 , class A2 , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( const Future<A1,A2> & after - , const Future<A3,A4> & before - , typename Kokkos::Impl::enable_if - < Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value - && - Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - after.m_task->add_dependence( before.m_task ); -#endif - } - - //---------------------------------------- - // Functions for an executing task functor to query dependences, - // set new dependences, and respawn itself. - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< void , execution_space > - get_dependence( const FunctorType * task_functor , int i ) const - { - return Future<void,execution_space>( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->get_dependence(i) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - int get_dependence( const FunctorType * task_functor ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return get_task_root(task_functor)->get_dependence(); } -#else - { return 0 ; } -#endif - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void clear_dependence( FunctorType * task_functor ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { get_task_root(task_functor)->clear_dependence(); } -#else - {} -#endif - - template< class FunctorType , class A3 , class A4 > - KOKKOS_INLINE_FUNCTION - void add_dependence( FunctorType * task_functor - , const Future<A3,A4> & before - , typename Kokkos::Impl::enable_if - < Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value - >::type * = 0 - ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { get_task_root(task_functor)->add_dependence( before.m_task ); } -#else - {} -#endif - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn( FunctorType * task_functor - , const bool priority = false ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->schedule(); -#endif - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - void respawn_needing_memory( FunctorType * task_functor ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->schedule(); -#endif - } - - //---------------------------------------- - - static member_type & member_single(); -}; - -inline -void wait( TaskPolicy< Kokkos::Serial > & ) -{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); } - -} /* namespace Experimental */ -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* defined( KOKKOS_HAVE_SERIAL ) */ -#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.cpp b/lib/kokkos/core/src/impl/Kokkos_Shape.cpp deleted file mode 100644 index da12db1f381e790e46604f8a15280d2a07f5152a..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Shape.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -#include <sstream> -#include <impl/Kokkos_Error.hpp> -#include <impl/Kokkos_Shape.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void assert_counts_are_equal_throw( - const size_t x_count , - const size_t y_count ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_counts_are_equal_throw( " - << x_count << " != " << y_count << " )" ; - - throw_runtime_exception( msg.str() ); -} - -void assert_shapes_are_equal_throw( - const unsigned x_scalar_size , - const unsigned x_rank , - const size_t x_N0 , const unsigned x_N1 , - const unsigned x_N2 , const unsigned x_N3 , - const unsigned x_N4 , const unsigned x_N5 , - const unsigned x_N6 , const unsigned x_N7 , - - const unsigned y_scalar_size , - const unsigned y_rank , - const size_t y_N0 , const unsigned y_N1 , - const unsigned y_N2 , const unsigned y_N3 , - const unsigned y_N4 , const unsigned y_N5 , - const unsigned y_N6 , const unsigned y_N7 ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_shape_are_equal_throw( {" - << " scalar_size(" << x_scalar_size - << ") rank(" << x_rank - << ") dimension(" ; - if ( 0 < x_rank ) { msg << " " << x_N0 ; } - if ( 1 < x_rank ) { msg << " " << x_N1 ; } - if ( 2 < x_rank ) { msg << " " << x_N2 ; } - if ( 3 < x_rank ) { msg << " " << x_N3 ; } - if ( 4 < x_rank ) { msg << " " << x_N4 ; } - if ( 5 < x_rank ) { msg << " " << x_N5 ; } - if ( 6 < x_rank ) { msg << " " << x_N6 ; } - if ( 7 < x_rank ) { msg << " " << x_N7 ; } - msg << " ) } != { " - << " scalar_size(" << y_scalar_size - << ") rank(" << y_rank - << ") dimension(" ; - if ( 0 < y_rank ) { msg << " " << y_N0 ; } - if ( 1 < y_rank ) { msg << " " << y_N1 ; } - if ( 2 < y_rank ) { msg << " " << y_N2 ; } - if ( 3 < y_rank ) { msg << " " << y_N3 ; } - if ( 4 < y_rank ) { msg << " " << y_N4 ; } - if ( 5 < y_rank ) { msg << " " << y_N5 ; } - if ( 6 < y_rank ) { msg << " " << y_N6 ; } - if ( 7 < y_rank ) { msg << " " << y_N7 ; } - msg << " ) } )" ; - - throw_runtime_exception( msg.str() ); -} - -void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply( - const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ) -{ - std::ostringstream msg ; - msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ; - if ( 0 < rank ) { msg << " " << n0 ; } - if ( 1 < rank ) { msg << " " << n1 ; } - if ( 2 < rank ) { msg << " " << n2 ; } - if ( 3 < rank ) { msg << " " << n3 ; } - if ( 4 < rank ) { msg << " " << n4 ; } - if ( 5 < rank ) { msg << " " << n5 ; } - if ( 6 < rank ) { msg << " " << n6 ; } - if ( 7 < rank ) { msg << " " << n7 ; } - msg << " } index = {" ; - if ( 0 < arg_rank ) { msg << " " << i0 ; } - if ( 1 < arg_rank ) { msg << " " << i1 ; } - if ( 2 < arg_rank ) { msg << " " << i2 ; } - if ( 3 < arg_rank ) { msg << " " << i3 ; } - if ( 4 < arg_rank ) { msg << " " << i4 ; } - if ( 5 < arg_rank ) { msg << " " << i5 ; } - if ( 6 < arg_rank ) { msg << " " << i6 ; } - if ( 7 < arg_rank ) { msg << " " << i7 ; } - msg << " } )" ; - - throw_runtime_exception( msg.str() ); -} - -void assert_shape_effective_rank1_at_leastN_throw( - const size_t x_rank , const size_t x_N0 , - const size_t x_N1 , const size_t x_N2 , - const size_t x_N3 , const size_t x_N4 , - const size_t x_N5 , const size_t x_N6 , - const size_t x_N7 , - const size_t N0 ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ; - if ( 0 < x_rank ) { msg << " " << x_N0 ; } - if ( 1 < x_rank ) { msg << " " << x_N1 ; } - if ( 2 < x_rank ) { msg << " " << x_N2 ; } - if ( 3 < x_rank ) { msg << " " << x_N3 ; } - if ( 4 < x_rank ) { msg << " " << x_N4 ; } - if ( 5 < x_rank ) { msg << " " << x_N5 ; } - if ( 6 < x_rank ) { msg << " " << x_N6 ; } - if ( 7 < x_rank ) { msg << " " << x_N7 ; } - msg << " } N = " << N0 << " )" ; - - throw_runtime_exception( msg.str() ); -} - - - -} -} - diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp deleted file mode 100644 index 9749e0a1ff73107b97435862f737d96439fcb9d3..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp +++ /dev/null @@ -1,917 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_SHAPE_HPP -#define KOKKOS_SHAPE_HPP - -#include <typeinfo> -#include <utility> -#include <Kokkos_Core_fwd.hpp> -#include <impl/Kokkos_Traits.hpp> -#include <impl/Kokkos_StaticAssert.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -/** \brief The shape of a Kokkos with dynamic and static dimensions. - * Dynamic dimensions are member values and static dimensions are - * 'static const' values. - * - * The upper bound on the array rank is eight. - */ -template< unsigned ScalarSize , - unsigned Rank , - unsigned s0 = 1 , - unsigned s1 = 1 , - unsigned s2 = 1 , - unsigned s3 = 1 , - unsigned s4 = 1 , - unsigned s5 = 1 , - unsigned s6 = 1 , - unsigned s7 = 1 > -struct Shape ; - -//---------------------------------------------------------------------------- -/** \brief Shape equality if the value type, layout, and dimensions - * are equal. - */ -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -KOKKOS_INLINE_FUNCTION -bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , - const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) -{ - enum { same_size = xSize == ySize }; - enum { same_rank = xRank == yRank }; - - return same_size && same_rank && - size_t( x.N0 ) == size_t( y.N0 ) && - unsigned( x.N1 ) == unsigned( y.N1 ) && - unsigned( x.N2 ) == unsigned( y.N2 ) && - unsigned( x.N3 ) == unsigned( y.N3 ) && - unsigned( x.N4 ) == unsigned( y.N4 ) && - unsigned( x.N5 ) == unsigned( y.N5 ) && - unsigned( x.N6 ) == unsigned( y.N6 ) && - unsigned( x.N7 ) == unsigned( y.N7 ) ; -} - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize ,unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -KOKKOS_INLINE_FUNCTION -bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , - const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) -{ return ! operator == ( x , y ); } - -//---------------------------------------------------------------------------- - -void assert_counts_are_equal_throw( - const size_t x_count , - const size_t y_count ); - -inline -void assert_counts_are_equal( - const size_t x_count , - const size_t y_count ) -{ - if ( x_count != y_count ) { - assert_counts_are_equal_throw( x_count , y_count ); - } -} - -void assert_shapes_are_equal_throw( - const unsigned x_scalar_size , - const unsigned x_rank , - const size_t x_N0 , const unsigned x_N1 , - const unsigned x_N2 , const unsigned x_N3 , - const unsigned x_N4 , const unsigned x_N5 , - const unsigned x_N6 , const unsigned x_N7 , - - const unsigned y_scalar_size , - const unsigned y_rank , - const size_t y_N0 , const unsigned y_N1 , - const unsigned y_N2 , const unsigned y_N3 , - const unsigned y_N4 , const unsigned y_N5 , - const unsigned y_N6 , const unsigned y_N7 ); - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -inline -void assert_shapes_are_equal( - const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , - const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) -{ - typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ; - typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ; - - if ( x != y ) { - assert_shapes_are_equal_throw( - x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, - y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); - } -} - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -void assert_shapes_equal_dimension( - const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x , - const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y ) -{ - typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ; - typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ; - - // Omit comparison of scalar_size. - if ( unsigned( x.rank ) != unsigned( y.rank ) || - size_t( x.N0 ) != size_t( y.N0 ) || - unsigned( x.N1 ) != unsigned( y.N1 ) || - unsigned( x.N2 ) != unsigned( y.N2 ) || - unsigned( x.N3 ) != unsigned( y.N3 ) || - unsigned( x.N4 ) != unsigned( y.N4 ) || - unsigned( x.N5 ) != unsigned( y.N5 ) || - unsigned( x.N6 ) != unsigned( y.N6 ) || - unsigned( x.N7 ) != unsigned( y.N7 ) ) { - assert_shapes_are_equal_throw( - x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, - y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); - } -} - -//---------------------------------------------------------------------------- - -template< class ShapeType > struct assert_shape_is_rank_zero ; -template< class ShapeType > struct assert_shape_is_rank_one ; - -template< unsigned Size > -struct assert_shape_is_rank_zero< Shape<Size,0> > - : public true_type {}; - -template< unsigned Size , unsigned s0 > -struct assert_shape_is_rank_one< Shape<Size,1,s0> > - : public true_type {}; - -//---------------------------------------------------------------------------- - -/** \brief Array bounds assertion templated on the execution space - * to allow device-specific abort code. - */ -template< class Space > -struct AssertShapeBoundsAbort ; - -template<> -struct AssertShapeBoundsAbort< Kokkos::HostSpace > -{ - static void apply( const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ); -}; - -template< class ExecutionSpace > -struct AssertShapeBoundsAbort -{ - KOKKOS_INLINE_FUNCTION - static void apply( const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ) - { - AssertShapeBoundsAbort< Kokkos::HostSpace > - ::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 , - arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); - } -}; - -template< class ShapeType > -KOKKOS_INLINE_FUNCTION -void assert_shape_bounds( const ShapeType & shape , - const size_t arg_rank , - const size_t i0 , - const size_t i1 = 0 , - const size_t i2 = 0 , - const size_t i3 = 0 , - const size_t i4 = 0 , - const size_t i5 = 0 , - const size_t i6 = 0 , - const size_t i7 = 0 ) -{ - // Must supply at least as many indices as ranks. - // Every index must be within bounds. - const bool ok = ShapeType::rank <= arg_rank && - i0 < size_t(shape.N0) && - i1 < size_t(shape.N1) && - i2 < size_t(shape.N2) && - i3 < size_t(shape.N3) && - i4 < size_t(shape.N4) && - i5 < size_t(shape.N5) && - i6 < size_t(shape.N6) && - i7 < size_t(shape.N7) ; - - if ( ! ok ) { - AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace > - ::apply( ShapeType::rank , - shape.N0 , shape.N1 , shape.N2 , shape.N3 , - shape.N4 , shape.N5 , shape.N6 , shape.N7 , - arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); - } -} - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) -#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7); -#else -#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */ -#endif - - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// Specialization and optimization for the Rank 0 shape. - -template < unsigned ScalarSize > -struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 0 }; - enum { rank = 0 }; - - enum { N0 = 1 }; - enum { N1 = 1 }; - enum { N2 = 1 }; - enum { N3 = 1 }; - enum { N4 = 1 }; - enum { N5 = 1 }; - enum { N6 = 1 }; - enum { N7 = 1 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - {} -}; - -//---------------------------------------------------------------------------- - -template< unsigned R > struct assign_shape_dimension ; - -#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \ -template<> \ -struct assign_shape_dimension< R > \ -{ \ - template< class ShapeType > \ - KOKKOS_INLINE_FUNCTION \ - assign_shape_dimension( ShapeType & shape \ - , typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \ - ) { shape.N ## R = n ; } \ -}; - -KOKKOS_ASSIGN_SHAPE_DIMENSION(0) -KOKKOS_ASSIGN_SHAPE_DIMENSION(1) -KOKKOS_ASSIGN_SHAPE_DIMENSION(2) -KOKKOS_ASSIGN_SHAPE_DIMENSION(3) -KOKKOS_ASSIGN_SHAPE_DIMENSION(4) -KOKKOS_ASSIGN_SHAPE_DIMENSION(5) -KOKKOS_ASSIGN_SHAPE_DIMENSION(6) -KOKKOS_ASSIGN_SHAPE_DIMENSION(7) - -#undef KOKKOS_ASSIGN_SHAPE_DIMENSION - -//---------------------------------------------------------------------------- -// All-static dimension array - -template < unsigned ScalarSize , - unsigned Rank , - unsigned s0 , - unsigned s1 , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape { - - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 0 }; - enum { rank = Rank }; - - enum { N0 = s0 }; - enum { N1 = s1 }; - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - {} -}; - -// 1 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , - unsigned Rank , - unsigned s1 , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 1 }; - enum { rank = Rank }; - - size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32 - - enum { N1 = s1 }; - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; } -}; - -// 2 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 2 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; } -}; - -// 3 == dynamic_rank <= rank <= 8 -template < unsigned Rank , unsigned ScalarSize , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7> -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 3 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; } -}; - -// 4 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 4 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; } -}; - -// 5 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 5 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; } -}; - -// 6 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 6 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; - } -}; - -// 7 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 7 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - unsigned N6 ; - - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; - } -}; - -// 8 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize > -struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 8 }; - enum { rank = 8 }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - unsigned N6 ; - unsigned N7 ; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ; - } -}; - -//---------------------------------------------------------------------------- - -template< class ShapeType , unsigned N , - unsigned R = ShapeType::rank_dynamic > -struct ShapeInsert ; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 0 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - N , - ShapeType::N0 , - ShapeType::N1 , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 1 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - N , - ShapeType::N1 , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 2 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - N , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 3 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - N , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 4 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 5 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 6 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 7 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - N > type ; -}; - -//---------------------------------------------------------------------------- - -template< class DstShape , class SrcShape , - unsigned DstRankDynamic = DstShape::rank_dynamic , - bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) > -struct ShapeCompatible { enum { value = false }; }; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 8 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 7 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 6 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 5 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 4 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 3 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 2 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 1 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N1) == unsigned(SrcShape::N1) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 0 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N0) == unsigned(SrcShape::N0) && - unsigned(DstShape::N1) == unsigned(SrcShape::N1) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< unsigned ScalarSize , unsigned Rank , - unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , - unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 , - typename iType > -KOKKOS_INLINE_FUNCTION -size_t dimension( - const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape , - const iType & r ) -{ - return 0 == r ? shape.N0 : ( - 1 == r ? shape.N1 : ( - 2 == r ? shape.N2 : ( - 3 == r ? shape.N3 : ( - 4 == r ? shape.N4 : ( - 5 == r ? shape.N5 : ( - 6 == r ? shape.N6 : ( - 7 == r ? shape.N7 : 1 ))))))); -} - -template< unsigned ScalarSize , unsigned Rank , - unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , - unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 > -KOKKOS_INLINE_FUNCTION -size_t cardinality_count( - const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ) -{ - return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 * - shape.N4 * shape.N5 * shape.N6 * shape.N7 ; -} - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#endif /* #ifndef KOKKOS_CORESHAPE_HPP */ - diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp similarity index 85% rename from lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp rename to lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 96b37043455e51d726e1d22e4f3e450986acae01..1ae51742e0a2cbe437abf17c7dedb8658c3e3e94 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -44,7 +44,6 @@ #include <Kokkos_Core.hpp> namespace Kokkos { -namespace Experimental { namespace Impl { int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ; @@ -62,7 +61,7 @@ void SharedAllocationRecord< void , void >::tracking_release_and_enable() // now release and enable tracking. if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){ - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" ); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" ); } } @@ -98,10 +97,10 @@ if ( ! ok ) { const char * format_string; if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n"; + format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n"; } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n"; + format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n"; } fprintf(stderr @@ -119,7 +118,7 @@ if ( ! ok ) { } if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) { - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed is_sane unlocking"); } } @@ -145,7 +144,7 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con if ( r == arg_root ) { r = 0 ; } if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) { - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking"); } return r ; @@ -190,11 +189,11 @@ SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root Kokkos::memory_fence(); if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) { - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking"); } } else { - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord given NULL allocation"); } } @@ -205,7 +204,7 @@ increment( SharedAllocationRecord< void , void > * arg_record ) const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 ); if ( old_count < 0 ) { // Error - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed increment"); } } @@ -219,7 +218,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) #if 0 if ( old_count <= 1 ) { - fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count ); + fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count ); fflush(stderr); } #endif @@ -251,7 +250,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) // Unlock the list: if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) { - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement unlocking"); } arg_record->m_next = 0 ; @@ -262,9 +261,9 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) arg_record = 0 ; } else if ( old_count < 1 ) { // Error - fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count ); + fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count ); fflush(stderr); - Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count"); + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement count"); } return arg_record ; @@ -340,7 +339,6 @@ print_host_accessible_records( std::ostream & s } } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp similarity index 96% rename from lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp rename to lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 1498eafb008ffa5d26a84094df9ba3f48126551e..a9c2d6f22a7638c33431575723d549194eec79af 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -48,7 +48,6 @@ #include <string> namespace Kokkos { -namespace Experimental { namespace Impl { template< class MemorySpace = void , class DestroyFunctor = void > @@ -109,6 +108,7 @@ protected: ); public: + inline std::string get_label() const { return std::string("Unmanaged"); } static int tracking_enabled() { return s_tracking_enabled ; } @@ -209,7 +209,7 @@ private: , const size_t arg_alloc ) /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */ - : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > ) + : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Impl::deallocate< MemorySpace , DestroyFunctor > ) , m_destroy() {} @@ -238,6 +238,9 @@ public: } }; +template< class MemorySpace > +class SharedAllocationRecord<MemorySpace,void> : public SharedAllocationRecord< void , void > {}; + union SharedAllocationTracker { private: @@ -297,9 +300,9 @@ public: template< class MemorySpace > std::string get_label() const { - return ( m_record_bits & DO_NOT_DEREF_FLAG ) + return ( m_record_bits == DO_NOT_DEREF_FLAG ) ? std::string() - : static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label() + : reinterpret_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record_bits & ~DO_NOT_DEREF_FLAG )->get_label() ; } @@ -394,7 +397,6 @@ public: } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp index 0bc2864ff1d9079f47ec4369f25388794aa52f71..9545e7e6b33a919ca925d00160e9c0fbed433b9c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp @@ -61,34 +61,15 @@ * struct Foo { using array_layout = void; }; * have_array_layout<Foo>::value == 1; */ -#define KOKKOS_HAVE_TYPE( Type ) \ -template <typename T> \ -struct have_##Type { \ - template <typename U> static std::false_type have_type(...); \ - template <typename U> static std::true_type have_type( typename U::Type* ); \ - using type = decltype(have_type<T>(nullptr)); \ - static constexpr bool value = type::value; \ -} - -/** KOKKOS_IS_CONCEPT( Concept ) - * - * defines a meta-function that check if a type match the given Kokkos concept - * type alias which matches Type - * - * e.g. - * KOKKOS_IS_CONCEPT( array_layout ); - * struct Foo { using array_layout = Foo; }; - * is_array_layout<Foo>::value == 1; - */ -#define KOKKOS_IS_CONCEPT( Concept ) \ -template <typename T> \ -struct is_##Concept { \ - template <typename U> static std::false_type have_concept(...); \ - template <typename U> static auto have_concept( typename U::Concept* ) \ - ->typename std::is_same<T, typename U::Concept>::type;\ - using type = decltype(have_concept<T>(nullptr)); \ - static constexpr bool value = type::value; \ -} +#define KOKKOS_HAVE_TYPE( TYPE ) \ +template <typename T> struct have_ ## TYPE { \ +private: \ + template <typename U, typename = void > struct X : std::false_type {}; \ + template <typename U> struct X<U,typename std::conditional<true,void,typename X:: TYPE >::type > : std::true_type {}; \ +public: \ + typedef typename X<T>::type type ; \ + enum : bool { value = type::value }; \ +}; //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -98,101 +79,11 @@ namespace Kokkos { namespace Impl { template <typename T> using is_void = std::is_same<void,T>; -// is_memory_space<T>::value -KOKKOS_IS_CONCEPT( memory_space ); - -// is_memory_traits<T>::value -KOKKOS_IS_CONCEPT( memory_traits ); - -// is_execution_space<T>::value -KOKKOS_IS_CONCEPT( execution_space ); - -// is_execution_policy<T>::value -KOKKOS_IS_CONCEPT( execution_policy ); - -// is_array_layout<T>::value -KOKKOS_IS_CONCEPT( array_layout ); - -// is_iteration_pattern<T>::value -KOKKOS_IS_CONCEPT( iteration_pattern ); - -// is_schedule_type<T>::value -KOKKOS_IS_CONCEPT( schedule_type ); - -// is_index_type<T>::value -KOKKOS_IS_CONCEPT( index_type ); - }} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -template< class ExecutionSpace , class MemorySpace > -struct Device { - static_assert( Impl::is_execution_space<ExecutionSpace>::value - , "Execution space is not valid" ); - static_assert( Impl::is_memory_space<MemorySpace>::value - , "Memory space is not valid" ); - typedef ExecutionSpace execution_space; - typedef MemorySpace memory_space; - typedef Device<execution_space,memory_space> device_type; -}; -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class C , class Enable = void > -struct is_space : public Impl::false_type {}; - -template< class C > -struct is_space< C - , typename Impl::enable_if<( - Impl::is_same< C , typename C::execution_space >::value || - Impl::is_same< C , typename C::memory_space >::value || - Impl::is_same< C , Device< - typename C::execution_space, - typename C::memory_space> >::value - )>::type - > - : public Impl::true_type -{ - typedef typename C::execution_space execution_space ; - typedef typename C::memory_space memory_space ; - - // The host_memory_space defines a space with host-resident memory. - // If the execution space's memory space is host accessible then use that execution space. - // else use the HostSpace. - typedef - typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value -#ifdef KOKKOS_HAVE_CUDA - || Impl::is_same< memory_space , CudaUVMSpace>::value - || Impl::is_same< memory_space , CudaHostPinnedSpace>::value -#endif - , memory_space , HostSpace >::type - host_memory_space ; - - // The host_execution_space defines a space which has access to HostSpace. - // If the execution space can access HostSpace then use that execution space. - // else use the DefaultHostExecutionSpace. -#ifdef KOKKOS_HAVE_CUDA - typedef - typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value - , DefaultHostExecutionSpace , execution_space >::type - host_execution_space ; -#else - typedef execution_space host_execution_space; #endif - typedef Device<host_execution_space,host_memory_space> host_mirror_space; -}; -} -} - -#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index 663bb1985d3636e84e236660b1c58fda5579cccc..ee9c69e9227a8127426845ef5a563636137fb279 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -46,7 +46,7 @@ #ifndef KOKKOS_IMPL_TASKQUEUE_HPP #define KOKKOS_IMPL_TASKQUEUE_HPP -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) #include <string> #include <typeinfo> @@ -55,19 +55,29 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -template< typename > class TaskPolicy ; - -template< typename Arg1 = void , typename Arg2 = void > class Future ; - -} /* namespace Kokkos */ - namespace Kokkos { namespace Impl { -template< typename , typename , typename > class TaskBase ; -template< typename > class TaskExec ; +/*\brief Implementation data for task data management, access, and execution. + * + * Curiously recurring template pattern (CRTP) + * to allow static_cast from the + * task root type and a task's FunctorType. + * + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * TaskBase< Space , ResultType , void > + * : TaskBase< Space , void , void > + * { ... }; + */ +template< typename Space , typename ResultType , typename FunctorType > +class TaskBase ; + +template< typename Space > +class TaskExec ; } /* namespace Impl */ } /* namespace Kokkos */ @@ -91,7 +101,7 @@ class TaskQueue { private: friend class TaskQueueSpecialization< ExecSpace > ; - friend class Kokkos::TaskPolicy< ExecSpace > ; + friend class Kokkos::TaskScheduler< ExecSpace > ; using execution_space = ExecSpace ; using specialization = TaskQueueSpecialization< execution_space > ; @@ -201,7 +211,7 @@ public: #endif if ( *lhs ) decrement( *lhs ); - if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); } + if ( rhs ) { Kokkos::atomic_increment( &(rhs->m_ref_count) ); } // Force write of *lhs @@ -326,7 +336,7 @@ public: using execution_space = ExecSpace ; using queue_type = TaskQueue< execution_space > ; - template< typename > friend class Kokkos::TaskPolicy ; + template< typename > friend class Kokkos::TaskScheduler ; typedef void (* function_type) ( TaskBase * , void * ); @@ -494,6 +504,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index 70a880d4a2e341a9f8e78df97c57531ca53492f6..05fd06a9ade307c475ca0d127ed8bb1171f8bbf3 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -41,7 +41,7 @@ //@HEADER */ -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#if defined( KOKKOS_ENABLE_TASKDAG ) namespace Kokkos { namespace Impl { @@ -67,6 +67,7 @@ TaskQueue< ExecSpace >::TaskQueue , arg_memory_pool_superblock_capacity_log2 ) , m_ready() , m_accum_alloc(0) + , m_count_alloc(0) , m_max_alloc(0) , m_ready_count(0) { @@ -122,7 +123,7 @@ void TaskQueue< ExecSpace >::decrement task->m_queue->deallocate( task , task->m_alloc_size ); } else if ( count <= 1 ) { - Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" ); + Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" ); } } @@ -565,5 +566,5 @@ void TaskQueue< ExecSpace >::complete } /* namespace Kokkos */ -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Timer.hpp b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp index 1f14e42874bda3c43f5f18bced120d73366abd40..293e395b88489f9cb63aa4c9717d8dc45ea2a56e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Timer.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Timer.hpp @@ -44,74 +44,19 @@ #ifndef KOKKOS_IMPLWALLTIME_HPP #define KOKKOS_IMPLWALLTIME_HPP -#include <stddef.h> - -#ifdef _MSC_VER -#undef KOKKOS_USE_LIBRT -#include <gettimeofday.c> -#else -#ifdef KOKKOS_USE_LIBRT -#include <ctime> -#else -#include <sys/time.h> -#endif -#endif +#include <Kokkos_Timer.hpp> namespace Kokkos { namespace Impl { -/** \brief Time since construction */ - -class Timer { -private: - #ifdef KOKKOS_USE_LIBRT - struct timespec m_old; - #else - struct timeval m_old ; - #endif - Timer( const Timer & ); - Timer & operator = ( const Timer & ); -public: - - inline - void reset() { - #ifdef KOKKOS_USE_LIBRT - clock_gettime(CLOCK_REALTIME, &m_old); - #else - gettimeofday( & m_old , ((struct timezone *) NULL ) ); - #endif - } - - inline - ~Timer() {} - - inline - Timer() { reset(); } +/** \brief Time since construction + * Timer promoted from Impl to Kokkos ns + * This file included for backwards compatibility + */ - inline - double seconds() const - { - #ifdef KOKKOS_USE_LIBRT - struct timespec m_new; - clock_gettime(CLOCK_REALTIME, &m_new); - - return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + - ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 ); - #else - struct timeval m_new ; - - ::gettimeofday( & m_new , ((struct timezone *) NULL ) ); - - return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + - ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 ); - #endif - } -}; + using Kokkos::Timer ; } // namespace Impl - - using Kokkos::Impl::Timer ; - } // namespace Kokkos #endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d66fdd9a57a7c6b4b7184c72c3f7595fca62a0c8 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -0,0 +1,414 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CORE_IMPL_UTILITIES_HPP +#define KOKKOS_CORE_IMPL_UTILITIES_HPP + +#include <Kokkos_Macros.hpp> +#include <type_traits> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { namespace Impl { + +// same as std::forward +// needed to allow perfect forwarding on the device +template <typename T> +KOKKOS_INLINE_FUNCTION +constexpr +T&& forward( typename std::remove_reference<T>::type& arg ) noexcept +{ return static_cast<T&&>(arg); } + +template <typename T> +KOKKOS_INLINE_FUNCTION +constexpr +T&& forward( typename std::remove_reference<T>::type&& arg ) noexcept +{ return static_cast<T&&>(arg); } + +// same as std::move +// needed to allowing moving on the device +template <typename T> +KOKKOS_INLINE_FUNCTION +constexpr +typename std::remove_reference<T>::type&& move( T&& arg ) noexcept +{ return static_cast<typename std::remove_reference<T>::type&&>(arg); } + +// empty function to allow expanding a variadic argument pack +template<typename... Args> +KOKKOS_INLINE_FUNCTION +void expand_variadic(Args &&...) {} + +//---------------------------------------- +// C++14 integer sequence +template< typename T , T ... Ints > +struct integer_sequence { + using value_type = T ; + static constexpr std::size_t size() noexcept { return sizeof...(Ints); } +}; + +template< typename T , std::size_t N > +struct make_integer_sequence_helper ; + +template< typename T , T N > +using make_integer_sequence = + typename make_integer_sequence_helper<T,N>::type ; + +template< typename T > +struct make_integer_sequence_helper< T , 0 > +{ using type = integer_sequence<T> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 1 > +{ using type = integer_sequence<T,0> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 2 > +{ using type = integer_sequence<T,0,1> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 3 > +{ using type = integer_sequence<T,0,1,2> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 4 > +{ using type = integer_sequence<T,0,1,2,3> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 5 > +{ using type = integer_sequence<T,0,1,2,3,4> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 6 > +{ using type = integer_sequence<T,0,1,2,3,4,5> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 7 > +{ using type = integer_sequence<T,0,1,2,3,4,5,6> ; }; + +template< typename T > +struct make_integer_sequence_helper< T , 8 > +{ using type = integer_sequence<T,0,1,2,3,4,5,6,7> ; }; + +template< typename X , typename Y > +struct make_integer_sequence_concat ; + +template< typename T , T ... x , T ... y > +struct make_integer_sequence_concat< integer_sequence<T,x...> + , integer_sequence<T,y...> > +{ using type = integer_sequence< T , x ... , (sizeof...(x)+y)... > ; }; + +template< typename T , std::size_t N > +struct make_integer_sequence_helper { + using type = typename make_integer_sequence_concat + < typename make_integer_sequence_helper< T , N/2 >::type + , typename make_integer_sequence_helper< T , N - N/2 >::type + >::type ; +}; + +//---------------------------------------- + +template <std::size_t... Indices> +using index_sequence = integer_sequence<std::size_t, Indices...>; + +template< std::size_t N > +using make_index_sequence = make_integer_sequence< std::size_t, N>; + +//---------------------------------------- + +template <unsigned I, typename IntegerSequence> +struct integer_sequence_at; + +template <unsigned I, typename T, T h0, T... tail> +struct integer_sequence_at<I, integer_sequence<T, h0, tail...> > + : public integer_sequence_at<I-1u, integer_sequence<T,tail...> > +{ + static_assert( 8 <= I , "Reasoning Error" ); + static_assert( I < integer_sequence<T, h0, tail...>::size(), "Error: Index out of bounds"); +}; + +template < typename T, T h0, T... tail> +struct integer_sequence_at<0u, integer_sequence<T,h0, tail...> > +{ + using type = T; + static constexpr T value = h0; +}; + +template < typename T, T h0, T h1, T... tail> +struct integer_sequence_at<1u, integer_sequence<T, h0, h1, tail...> > +{ + using type = T; + static constexpr T value = h1; +}; + +template < typename T, T h0, T h1, T h2, T... tail> +struct integer_sequence_at<2u, integer_sequence<T, h0, h1, h2, tail...> > +{ + using type = T; + static constexpr T value = h2; +}; + +template < typename T, T h0, T h1, T h2, T h3, T... tail> +struct integer_sequence_at<3u, integer_sequence<T, h0, h1, h2, h3, tail...> > +{ + using type = T; + static constexpr T value = h3; +}; + +template < typename T, T h0, T h1, T h2, T h3, T h4, T... tail> +struct integer_sequence_at<4u, integer_sequence<T, h0, h1, h2, h3, h4, tail...> > +{ + using type = T; + static constexpr T value = h4; +}; + +template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T... tail> +struct integer_sequence_at<5u, integer_sequence<T, h0, h1, h2, h3, h4, h5, tail...> > +{ + using type = T; + static constexpr T value = h5; +}; + +template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T... tail> +struct integer_sequence_at<6u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, tail...> > +{ + using type = T; + static constexpr T value = h6; +}; + +template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail> +struct integer_sequence_at<7u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> > +{ + using type = T; + static constexpr T value = h7; +}; + +//---------------------------------------- + +template <typename T> +constexpr +T at( const unsigned, integer_sequence<T> ) noexcept +{ return ~static_cast<T>(0); } + +template <typename T, T h0, T... tail> +constexpr +T at( const unsigned i, integer_sequence<T, h0> ) noexcept +{ return i==0u ? h0 : ~static_cast<T>(0); } + +template <typename T, T h0, T h1> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2, T h3> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : + i==3u ? h3 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2, T h3, T h4> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : + i==3u ? h3 : + i==4u ? h4 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2, T h3, T h4, T h5> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : + i==3u ? h3 : + i==4u ? h4 : + i==5u ? h5 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : + i==3u ? h3 : + i==4u ? h4 : + i==5u ? h5 : + i==6u ? h6 : ~static_cast<T>(0); +} + +template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail> +constexpr +T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> ) noexcept +{ return i==0u ? h0 : + i==1u ? h1 : + i==2u ? h2 : + i==3u ? h3 : + i==4u ? h4 : + i==5u ? h5 : + i==6u ? h6 : + i==7u ? h7 : at(i-8u, integer_sequence<T, tail...>{} ); +} + +//---------------------------------------- + + +template < typename IntegerSequence + , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type> + > +struct reverse_integer_sequence_helper; + +template <typename T, T h0, T... tail, T... results> +struct reverse_integer_sequence_helper< integer_sequence<T, h0, tail...>, integer_sequence<T, results...> > + : public reverse_integer_sequence_helper< integer_sequence<T, tail...>, integer_sequence<T, h0, results...> > +{}; + +template <typename T, T... results> +struct reverse_integer_sequence_helper< integer_sequence<T>, integer_sequence<T, results...> > +{ + using type = integer_sequence<T, results...>; +}; + + +template <typename IntegerSequence> +using reverse_integer_sequence = typename reverse_integer_sequence_helper<IntegerSequence>::type; + +//---------------------------------------- + +template < typename IntegerSequence + , typename Result + , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type> + > +struct exclusive_scan_integer_sequence_helper; + +template <typename T, T h0, T... tail, typename Result, T... results> +struct exclusive_scan_integer_sequence_helper + < integer_sequence<T, h0, tail...> + , Result + , integer_sequence<T, results...> > + : public exclusive_scan_integer_sequence_helper + < integer_sequence<T, tail...> + , std::integral_constant<T,Result::value+h0> + , integer_sequence<T, 0, (results+h0)...> > +{}; + +template <typename T, typename Result, T... results> +struct exclusive_scan_integer_sequence_helper + < integer_sequence<T>, Result, integer_sequence<T, results...> > +{ + using type = integer_sequence<T, results...>; + static constexpr T value = Result::value ; +}; + +template <typename IntegerSequence> +struct exclusive_scan_integer_sequence +{ + using value_type = typename IntegerSequence::value_type; + using helper = + exclusive_scan_integer_sequence_helper + < reverse_integer_sequence<IntegerSequence> + , std::integral_constant< value_type , 0 > + > ; + using type = typename helper::type ; + static constexpr value_type value = helper::value ; +}; + +//---------------------------------------- + +template < typename IntegerSequence + , typename Result + , typename ResultSequence = integer_sequence<typename IntegerSequence::value_type> + > +struct inclusive_scan_integer_sequence_helper; + +template <typename T, T h0, T... tail, typename Result, T... results> +struct inclusive_scan_integer_sequence_helper + < integer_sequence<T, h0, tail...> + , Result + , integer_sequence<T, results...> > + : public inclusive_scan_integer_sequence_helper + < integer_sequence<T, tail...> + , std::integral_constant<T,Result::value+h0> + , integer_sequence<T, h0, (results+h0)...> > +{}; + +template <typename T, typename Result, T... results> +struct inclusive_scan_integer_sequence_helper + < integer_sequence<T>, Result, integer_sequence<T, results...> > +{ + using type = integer_sequence<T, results...>; + static constexpr T value = Result::value ; +}; + +template <typename IntegerSequence> +struct inclusive_scan_integer_sequence +{ + using value_type = typename IntegerSequence::value_type; + using helper = + inclusive_scan_integer_sequence_helper + < reverse_integer_sequence<IntegerSequence> + , std::integral_constant< value_type , 0 > + > ; + using type = typename helper::type ; + static constexpr value_type value = helper::value ; +}; + +}} // namespace Kokkos::Impl + + +#endif //KOKKOS_CORE_IMPL_UTILITIES diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp similarity index 96% rename from lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp rename to lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp index 17d28ace4dae471accfa91ab52629aee357850e9..c55636b64ea8331ae2a2d66fc2479b727cbf5115 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -116,7 +116,7 @@ class ViewMapping< Traits , private: template< class , class ... > friend class ViewMapping ; - template< class , class ... > friend class Kokkos::Experimental::View ; + template< class , class ... > friend class Kokkos::View ; typedef ViewOffset< typename Traits::dimension , typename Traits::array_layout @@ -301,17 +301,17 @@ public: //---------------------------------------- template< class ... P > - SharedAllocationRecord<> * - allocate_shared( ViewCtorProp< P... > const & arg_prop + Kokkos::Impl::SharedAllocationRecord<> * + allocate_shared( Kokkos::Impl::ViewCtorProp< P... > const & arg_prop , typename Traits::array_layout const & arg_layout ) { - typedef ViewCtorProp< P... > alloc_prop ; + typedef Kokkos::Impl::ViewCtorProp< P... > alloc_prop ; typedef typename alloc_prop::execution_space execution_space ; typedef typename Traits::memory_space memory_space ; typedef ViewValueFunctor< execution_space , scalar_type > functor_type ; - typedef SharedAllocationRecord< memory_space , functor_type > record_type ; + typedef Kokkos::Impl::SharedAllocationRecord< memory_space , functor_type > record_type ; // Query the mapping for byte-size of allocation. typedef std::integral_constant< unsigned , @@ -324,8 +324,8 @@ public: // Allocate memory from the memory space and create tracking record. record_type * const record = - record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value - , ((ViewCtorProp<void,std::string> const &) arg_prop ).value + record_type::allocate( ((Kokkos::Impl::ViewCtorProp<void,memory_space> const &) arg_prop ).value + , ((Kokkos::Impl::ViewCtorProp<void,std::string> const &) arg_prop ).value , alloc_size ); if ( alloc_size ) { @@ -334,7 +334,7 @@ public: if ( alloc_prop::initialize ) { // The functor constructs and destroys - record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value + record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value , (pointer_type) m_handle , m_offset.span() * Array_N ); @@ -377,7 +377,7 @@ public: enum { is_assignable = true }; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; + typedef Kokkos::Impl::SharedAllocationTracker TrackType ; typedef ViewMapping< DstTraits , void > DstType ; typedef ViewMapping< SrcTraits , void > SrcType ; @@ -436,7 +436,7 @@ public: enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value }; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ; + typedef Kokkos::Impl::SharedAllocationTracker TrackType ; typedef ViewMapping< DstTraits , void > DstType ; typedef ViewMapping< SrcTraits , void > SrcType ; @@ -558,13 +558,13 @@ private: public: - typedef Kokkos::Experimental::ViewTraits + typedef Kokkos::ViewTraits < data_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > traits_type ; - typedef Kokkos::Experimental::View + typedef Kokkos::View < data_type , array_layout , typename SrcTraits::device_type diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp similarity index 99% rename from lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp rename to lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp index 6525fed0a5ceb5995db3517b84fec6f7985e6d54..6381aee468c9ee114c5c050e20565c2a8e52b127 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp @@ -70,7 +70,6 @@ struct ViewAllocateWithoutInitializing { //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { struct WithoutInitializing_t {}; @@ -242,7 +241,6 @@ public: }; } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp deleted file mode 100644 index 94c8e13c1d445953fabc852aaece3fa8d07fa5eb..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp +++ /dev/null @@ -1,886 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWDEFAULT_HPP -#define KOKKOS_VIEWDEFAULT_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -struct ViewAssignment< ViewDefault , ViewDefault , void > -{ - typedef ViewDefault Specialize ; - - //------------------------------------ - /** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst , - const View<ST,SL,SD,SM,Specialize> & src , - const typename enable_if<( - ViewAssignable< ViewTraits<DT,DL,DD,DM> , - ViewTraits<ST,SL,SD,SM> >::value - || - ( ViewAssignable< ViewTraits<DT,DL,DD,DM> , - ViewTraits<ST,SL,SD,SM> >::assignable_value - && - ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type , - typename ViewTraits<ST,SL,SD,SM>::shape_type >::value - && - is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value - && (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value || - is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value)) - )>::type * = 0 ) - { - dst.m_offset_map.assign( src.m_offset_map ); - - dst.m_management = src.m_management ; - - dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker ); - - if( dst.is_managed ) - dst.m_tracker = src.m_tracker ; - else { - dst.m_tracker = AllocationTracker(); - dst.m_management.set_unmanaged(); - } - } - - - /** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst , - const View<ST,LayoutStride,SD,SM,Specialize> & src , - const typename enable_if<( - ( - ViewAssignable< ViewTraits<DT,DL,DD,DM> , - ViewTraits<ST,LayoutStride,SD,SM> >::value - || - ( ViewAssignable< ViewTraits<DT,DL,DD,DM> , - ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value - && - ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type , - typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value - ) - ) - && - (View<DT,DL,DD,DM,Specialize>::rank==1) - && (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value || - is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value) - )>::type * = 0 ) - { - size_t strides[8]; - src.stride(strides); - if(strides[0]!=1) { - Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1"); - } - dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 ); - - dst.m_management = src.m_management ; - - dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker ); - - if( dst.is_managed ) - dst.m_tracker = src.m_tracker ; - else { - dst.m_tracker = AllocationTracker(); - dst.m_management.set_unmanaged(); - } - } - - //------------------------------------ - /** \brief Deep copy data from compatible value type, layout, rank, and specialization. - * Check the dimensions and allocation lengths at runtime. - */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - inline static - void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst , - const View<ST,SL,SD,SM,Specialize> & src , - const typename Impl::enable_if<( - Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type , - typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value - && - Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout , - typename ViewTraits<ST,SL,SD,SM>::array_layout >::value - && - ( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) ) - )>::type * = 0 ) - { - typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ; - typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ; - - if ( dst.ptr_on_device() != src.ptr_on_device() ) { - - Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map ); - - const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity(); - - DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes ); - } - } -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class ExecSpace , class DT , class DL, class DD, class DM, class DS > -struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true > -{ - Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); } - - ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } -}; - -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type - > -struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > -{ -private: - - typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ; - - enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; - enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; - enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; - enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; - enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; - enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; - enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; - enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; - - // The source view rank must be equal to the input argument rank - // Once a void argument is encountered all subsequent arguments must be void. - enum { InputRank = - Impl::StaticAssert<( SrcViewType::rank == - ( V0 ? 0 : ( - V1 ? 1 : ( - V2 ? 2 : ( - V3 ? 3 : ( - V4 ? 4 : ( - V5 ? 5 : ( - V6 ? 6 : ( - V7 ? 7 : 8 ))))))) )) - && - ( SrcViewType::rank == - ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) - >::value ? SrcViewType::rank : 0 }; - - enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; - enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; - enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; - enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; - enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; - enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; - enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; - enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; - - enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) - + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; - - // Reverse - enum { R0_rev = 0 == InputRank ? 0u : ( - 1 == InputRank ? unsigned(R0) : ( - 2 == InputRank ? unsigned(R1) : ( - 3 == InputRank ? unsigned(R2) : ( - 4 == InputRank ? unsigned(R3) : ( - 5 == InputRank ? unsigned(R4) : ( - 6 == InputRank ? unsigned(R5) : ( - 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; - - typedef typename SrcViewType::array_layout SrcViewLayout ; - - // Choose array layout, attempting to preserve original layout if at all possible. - typedef typename Impl::if_c< - ( // Same Layout IF - // OutputRank 0 - ( OutputRank == 0 ) - || - // OutputRank 1 or 2, InputLayout Left, Interval 0 - // because single stride one or second index has a stride. - ( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value ) - || - // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] - // because single stride one or second index has a stride. - ( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value ) - ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; - - // Choose data type as a purely dynamic rank array to accomodate a runtime range. - typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , - typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, - typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, - typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, - typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, - typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, - typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, - typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, - typename SrcViewType::value_type ******** - >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; - - // Choose space. - // If the source view's template arg1 or arg2 is a space then use it, - // otherwise use the source view's execution space. - - typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , - typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type - >::type >::type OutputSpace ; - -public: - - // If keeping the layout then match non-data type arguments - // else keep execution space and memory traits. - typedef typename - Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value - , Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , Kokkos::View< OutputData , OutputViewLayout , OutputSpace - , typename SrcViewType::memory_traits - , Impl::ViewDefault > - >::type type ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -// Construct subview of a Rank 8 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - , const SubArg3_type & arg3 - , const SubArg4_type & arg4 - , const SubArg5_type & arg5 - , const SubArg6_type & arg6 - , const SubArg7_type & arg7 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; - typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; - typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; - typedef Impl::ViewOffsetRange< SubArg6_type > R6 ; - typedef Impl::ViewOffsetRange< SubArg7_type > R7 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , R3::dimension( src.m_offset_map.N3 , arg3 ) - , R4::dimension( src.m_offset_map.N4 , arg4 ) - , R5::dimension( src.m_offset_map.N5 , arg5 ) - , R6::dimension( src.m_offset_map.N6 , arg6 ) - , R7::dimension( src.m_offset_map.N7 , arg7 ) - ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - , R3::begin( arg3 ) - , R4::begin( arg4 ) - , R5::begin( arg5 ) - , R6::begin( arg6 ) - , R7::begin( arg7 ) ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 7 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type , class SubArg5_type , class SubArg6_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - , const SubArg3_type & arg3 - , const SubArg4_type & arg4 - , const SubArg5_type & arg5 - , const SubArg6_type & arg6 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , SubArg5_type , SubArg6_type , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; - typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; - typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; - typedef Impl::ViewOffsetRange< SubArg6_type > R6 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , R3::dimension( src.m_offset_map.N3 , arg3 ) - , R4::dimension( src.m_offset_map.N4 , arg4 ) - , R5::dimension( src.m_offset_map.N5 , arg5 ) - , R6::dimension( src.m_offset_map.N6 , arg6 ) - , 0 - ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - , R3::begin( arg3 ) - , R4::begin( arg4 ) - , R5::begin( arg5 ) - , R6::begin( arg6 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 6 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type , class SubArg5_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - , const SubArg3_type & arg3 - , const SubArg4_type & arg4 - , const SubArg5_type & arg5 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , SubArg5_type , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; - typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; - typedef Impl::ViewOffsetRange< SubArg5_type > R5 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , R3::dimension( src.m_offset_map.N3 , arg3 ) - , R4::dimension( src.m_offset_map.N4 , arg4 ) - , R5::dimension( src.m_offset_map.N5 , arg5 ) - , 0 - , 0 - ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - , R3::begin( arg3 ) - , R4::begin( arg4 ) - , R5::begin( arg5 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 5 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - , class SubArg4_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - , const SubArg3_type & arg3 - , const SubArg4_type & arg4 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , SubArg4_type , void , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; - typedef Impl::ViewOffsetRange< SubArg4_type > R4 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , R3::dimension( src.m_offset_map.N3 , arg3 ) - , R4::dimension( src.m_offset_map.N4 , arg4 ) - , 0 - , 0 - , 0 - ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - , R3::begin( arg3 ) - , R4::begin( arg4 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 4 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - , const SubArg3_type & arg3 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type - , void , void , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - typedef Impl::ViewOffsetRange< SubArg3_type > R3 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , R3::dimension( src.m_offset_map.N3 , arg3 ) - , 0 - , 0 - , 0 - , 0 - ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - , R3::begin( arg3 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 3 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type , class SubArg2_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - , const SubArg2_type & arg2 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - typedef Impl::ViewOffsetRange< SubArg2_type > R2 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , R2::dimension( src.m_offset_map.N2 , arg2 ) - , 0 , 0 , 0 , 0 , 0); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - , R2::begin( arg2 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 2 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type , class SubArg1_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - , const SubArg1_type & arg1 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , SubArg1_type , void , void , void , void , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - typedef Impl::ViewOffsetRange< SubArg1_type > R1 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , R1::dimension( src.m_offset_map.N1 , arg1 ) - , 0 , 0 , 0 , 0 , 0 , 0 ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - , R1::begin( arg1 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -// Construct subview of a Rank 1 view -template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type > -template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type - , class SubArg0_type - > -KOKKOS_INLINE_FUNCTION -View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >:: -View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src - , const SubArg0_type & arg0 - ) - : m_ptr_on_device( (typename traits::value_type*) NULL) - , m_offset_map() - , m_management() - , m_tracker() -{ - // This constructor can only be used to construct a subview - // from the source view. This type must match the subview type - // deduced from the source view and subview arguments. - - typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > - , SubArg0_type , void , void , void , void , void , void , void > - ViewSubviewDeduction ; - - enum { is_a_valid_subview_constructor = - Impl::StaticAssert< - Impl::is_same< View , typename ViewSubviewDeduction::type >::value - >::value - }; - - if ( is_a_valid_subview_constructor ) { - - typedef Impl::ViewOffsetRange< SubArg0_type > R0 ; - - // 'assign_subview' returns whether the subview offset_map - // introduces noncontiguity in the view. - const bool introduce_noncontiguity = - m_offset_map.assign_subview( src.m_offset_map - , R0::dimension( src.m_offset_map.N0 , arg0 ) - , 0 , 0 , 0 , 0 , 0 , 0 , 0 ); - - if ( m_offset_map.capacity() ) { - - m_management = src.m_management ; - - if ( introduce_noncontiguity ) m_management.set_noncontiguous(); - - m_ptr_on_device = src.m_ptr_on_device + - src.m_offset_map( R0::begin( arg0 ) - ); - m_tracker = src.m_tracker ; - } - } -} - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp new file mode 100644 index 0000000000000000000000000000000000000000..588166c1855402851b40d38e5fdb98cd585c7e00 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -0,0 +1,3156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP +#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP + +#include <type_traits> +#include <initializer_list> + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Pair.hpp> +#include <Kokkos_Layout.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_ViewCtor.hpp> +#include <impl/Kokkos_Atomic_View.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template< unsigned I , size_t ... Args > +struct variadic_size_t + { enum { value = ~size_t(0) }; }; + +template< size_t Val , size_t ... Args > +struct variadic_size_t< 0 , Val , Args ... > + { enum { value = Val }; }; + +template< unsigned I , size_t Val , size_t ... Args > +struct variadic_size_t< I , Val , Args ... > + { enum { value = variadic_size_t< I - 1 , Args ... >::value }; }; + +template< size_t ... Args > +struct rank_dynamic ; + +template<> +struct rank_dynamic<> { enum { value = 0 }; }; + +template< size_t Val , size_t ... Args > +struct rank_dynamic< Val , Args... > +{ + enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value }; +}; + +#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \ + template< size_t V , unsigned > struct ViewDimension ## R \ + { \ + enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \ + enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \ + KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \ + ViewDimension ## R () = default ; \ + ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ + ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ + }; \ + template< unsigned RD > struct ViewDimension ## R < 0 , RD > \ + { \ + enum { ArgN ## R = 0 }; \ + typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \ + ViewDimension ## R () = default ; \ + ViewDimension ## R ( const ViewDimension ## R & ) = default ; \ + ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \ + KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \ + }; + +KOKKOS_IMPL_VIEW_DIMENSION( 0 ) +KOKKOS_IMPL_VIEW_DIMENSION( 1 ) +KOKKOS_IMPL_VIEW_DIMENSION( 2 ) +KOKKOS_IMPL_VIEW_DIMENSION( 3 ) +KOKKOS_IMPL_VIEW_DIMENSION( 4 ) +KOKKOS_IMPL_VIEW_DIMENSION( 5 ) +KOKKOS_IMPL_VIEW_DIMENSION( 6 ) +KOKKOS_IMPL_VIEW_DIMENSION( 7 ) + +#undef KOKKOS_IMPL_VIEW_DIMENSION + +template< size_t ... Vals > +struct ViewDimension + : public ViewDimension0< variadic_size_t<0,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension1< variadic_size_t<1,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension2< variadic_size_t<2,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension3< variadic_size_t<3,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension4< variadic_size_t<4,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension5< variadic_size_t<5,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension6< variadic_size_t<6,Vals...>::value + , rank_dynamic< Vals... >::value > + , public ViewDimension7< variadic_size_t<7,Vals...>::value + , rank_dynamic< Vals... >::value > +{ + typedef ViewDimension0< variadic_size_t<0,Vals...>::value + , rank_dynamic< Vals... >::value > D0 ; + typedef ViewDimension1< variadic_size_t<1,Vals...>::value + , rank_dynamic< Vals... >::value > D1 ; + typedef ViewDimension2< variadic_size_t<2,Vals...>::value + , rank_dynamic< Vals... >::value > D2 ; + typedef ViewDimension3< variadic_size_t<3,Vals...>::value + , rank_dynamic< Vals... >::value > D3 ; + typedef ViewDimension4< variadic_size_t<4,Vals...>::value + , rank_dynamic< Vals... >::value > D4 ; + typedef ViewDimension5< variadic_size_t<5,Vals...>::value + , rank_dynamic< Vals... >::value > D5 ; + typedef ViewDimension6< variadic_size_t<6,Vals...>::value + , rank_dynamic< Vals... >::value > D6 ; + typedef ViewDimension7< variadic_size_t<7,Vals...>::value + , rank_dynamic< Vals... >::value > D7 ; + + using D0::ArgN0 ; + using D1::ArgN1 ; + using D2::ArgN2 ; + using D3::ArgN3 ; + using D4::ArgN4 ; + using D5::ArgN5 ; + using D6::ArgN6 ; + using D7::ArgN7 ; + + using D0::N0 ; + using D1::N1 ; + using D2::N2 ; + using D3::N3 ; + using D4::N4 ; + using D5::N5 ; + using D6::N6 ; + using D7::N7 ; + + enum { rank = sizeof...(Vals) }; + enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value }; + + ViewDimension() = default ; + ViewDimension( const ViewDimension & ) = default ; + ViewDimension & operator = ( const ViewDimension & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr + ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3 + , size_t n4 , size_t n5 , size_t n6 , size_t n7 ) + : D0( n0 ) + , D1( n1 ) + , D2( n2 ) + , D3( n3 ) + , D4( n4 ) + , D5( n5 ) + , D6( n6 ) + , D7( n7 ) + {} + + KOKKOS_INLINE_FUNCTION + constexpr size_t extent( const unsigned r ) const + { + return r == 0 ? N0 : ( + r == 1 ? N1 : ( + r == 2 ? N2 : ( + r == 3 ? N3 : ( + r == 4 ? N4 : ( + r == 5 ? N5 : ( + r == 6 ? N6 : ( + r == 7 ? N7 : 0 ))))))); + } + + template< size_t N > + struct prepend { typedef ViewDimension< N , Vals... > type ; }; + + template< size_t N > + struct append { typedef ViewDimension< Vals... , N > type ; }; +}; + +template< class A , class B > +struct ViewDimensionJoin ; + +template< size_t ... A , size_t ... B > +struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > { + typedef ViewDimension< A... , B... > type ; +}; + +//---------------------------------------------------------------------------- + +template< class DstDim , class SrcDim > +struct ViewDimensionAssignable ; + +template< size_t ... DstArgs , size_t ... SrcArgs > +struct ViewDimensionAssignable< ViewDimension< DstArgs ... > + , ViewDimension< SrcArgs ... > > +{ + typedef ViewDimension< DstArgs... > dst ; + typedef ViewDimension< SrcArgs... > src ; + + enum { value = + unsigned(dst::rank) == unsigned(src::rank) && ( + //Compile time check that potential static dimensions match + ( ( 1 > dst::rank_dynamic && 1 > src::rank_dynamic ) ? (size_t(dst::ArgN0) == size_t(src::ArgN0)) : true ) && + ( ( 2 > dst::rank_dynamic && 2 > src::rank_dynamic ) ? (size_t(dst::ArgN1) == size_t(src::ArgN1)) : true ) && + ( ( 3 > dst::rank_dynamic && 3 > src::rank_dynamic ) ? (size_t(dst::ArgN2) == size_t(src::ArgN2)) : true ) && + ( ( 4 > dst::rank_dynamic && 4 > src::rank_dynamic ) ? (size_t(dst::ArgN3) == size_t(src::ArgN3)) : true ) && + ( ( 5 > dst::rank_dynamic && 5 > src::rank_dynamic ) ? (size_t(dst::ArgN4) == size_t(src::ArgN4)) : true ) && + ( ( 6 > dst::rank_dynamic && 6 > src::rank_dynamic ) ? (size_t(dst::ArgN5) == size_t(src::ArgN5)) : true ) && + ( ( 7 > dst::rank_dynamic && 7 > src::rank_dynamic ) ? (size_t(dst::ArgN6) == size_t(src::ArgN6)) : true ) && + ( ( 8 > dst::rank_dynamic && 8 > src::rank_dynamic ) ? (size_t(dst::ArgN7) == size_t(src::ArgN7)) : true ) + )}; + +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct ALL_t { + KOKKOS_INLINE_FUNCTION + constexpr const ALL_t & operator()() const { return *this ; } +}; + +}} // namespace Kokkos::Impl + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +using Kokkos::Impl::ALL_t ; + +template< class T > +struct is_integral_extent_type +{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; }; + +template< class iType > +struct is_integral_extent_type< std::pair<iType,iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +template< class iType > +struct is_integral_extent_type< Kokkos::pair<iType,iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +// Assuming '2 == initializer_list<iType>::size()' +template< class iType > +struct is_integral_extent_type< std::initializer_list<iType> > +{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; }; + +template < unsigned I , class ... Args > +struct is_integral_extent +{ + // get_type is void when sizeof...(Args) <= I + typedef typename std::remove_cv< + typename std::remove_reference< + typename Kokkos::Impl::get_type<I,Args... + >::type >::type >::type type ; + + enum { value = is_integral_extent_type<type>::value }; + + static_assert( value || + std::is_integral<type>::value || + std::is_same<type,void>::value + , "subview argument must be either integral or integral extent" ); +}; + +// Rules for subview arguments and layouts matching + +template<class LayoutDest, class LayoutSrc, int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime; + +// Rules which allow LayoutLeft to LayoutLeft assignment + +template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> { + enum { value =(((CurrentArg==RankDest-1) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value)) || + ((CurrentArg>=RankDest) && (std::is_integral<Arg>::value)) || + ((CurrentArg<RankDest) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value)) || + ((CurrentArg==0) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value)) + ) && (SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg+1, SubViewArgs...>::value)}; +}; + +template<int RankDest, int RankSrc, int CurrentArg, class Arg> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankDest, RankSrc, CurrentArg, Arg> { + enum { value = ((CurrentArg==RankDest-1) || (std::is_integral<Arg>::value)) && + (CurrentArg==RankSrc-1) }; +}; + +// Rules which allow LayoutRight to LayoutRight assignment + +template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> { + enum { value =(((CurrentArg==RankSrc-RankDest) && (Kokkos::Experimental::Impl::is_integral_extent_type<Arg>::value)) || + ((CurrentArg<RankSrc-RankDest) && (std::is_integral<Arg>::value)) || + ((CurrentArg>=RankSrc-RankDest) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value)) + ) && (SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg+1, SubViewArgs...>::value)}; +}; + +template<int RankDest, int RankSrc, int CurrentArg, class Arg> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg> { + enum { value = ((CurrentArg==RankSrc-1) && (std::is_same<Arg,Kokkos::Impl::ALL_t>::value)) }; +}; + +// Rules which allow assignment to LayoutStride + +template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutLeft,RankDest,RankSrc,CurrentArg,SubViewArgs...> { + enum { value = true }; +}; + +template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutRight,RankDest,RankSrc,CurrentArg,SubViewArgs...> { + enum { value = true }; +}; + +template<int RankDest, int RankSrc, int CurrentArg, class ... SubViewArgs> +struct SubviewLegalArgsCompileTime<Kokkos::LayoutStride,Kokkos::LayoutStride,RankDest,RankSrc,CurrentArg,SubViewArgs...> { + enum { value = true }; +}; + + +template< unsigned DomainRank , unsigned RangeRank > +struct SubviewExtents { +private: + + // Cannot declare zero-length arrays + enum { InternalRangeRank = RangeRank ? RangeRank : 1u }; + + size_t m_begin[ DomainRank ]; + size_t m_length[ InternalRangeRank ]; + unsigned m_index[ InternalRangeRank ]; + + template< size_t ... DimArgs > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim ) + { return true ; } + + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const T & val + , Args ... args ) + { + const size_t v = static_cast<size_t>(val); + + m_begin[ domain_rank ] = v ; + + return set( domain_rank + 1 , range_rank , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( v < dim.extent( domain_rank ) ) +#endif + ; + } + + // ALL_t + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::Experimental::Impl::ALL_t + , Args ... args ) + { + m_begin[ domain_rank ] = 0 ; + m_length[ range_rank ] = dim.extent( domain_rank ); + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // std::pair range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::pair<T,T> & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.first ); + const size_t e = static_cast<size_t>( val.second ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + // Kokkos::pair range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::pair<T,T> & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.first ); + const size_t e = static_cast<size_t>( val.second ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + // { begin , end } range + template< class T , size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::initializer_list< T > & val + , Args ... args ) + { + const size_t b = static_cast<size_t>( val.begin()[0] ); + const size_t e = static_cast<size_t>( val.begin()[1] ); + + m_begin[ domain_rank ] = b ; + m_length[ range_rank ] = e - b ; + m_index[ range_rank ] = domain_rank ; + + return set( domain_rank + 1 , range_rank + 1 , dim , args... ) +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + && ( val.size() == 2 ) + && ( e <= b + dim.extent( domain_rank ) ) +#endif + ; + } + + //------------------------------ + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + + template< size_t ... DimArgs > + void error( char * + , int + , unsigned + , unsigned + , const ViewDimension< DimArgs ... > & ) const + {} + + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const T & val + , Args ... args ) const + { + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu < %lu %c" + , static_cast<unsigned long>(val) + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... ); + } + + // std::pair range + template< size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::Experimental::Impl::ALL_t + , Args ... args ) const + { + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " Kokkos::ALL %c" + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // std::pair range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::pair<T,T> & val + , Args ... args ) const + { + // d <= e - b + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.second ) + , static_cast<unsigned long>( val.begin ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // Kokkos::pair range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const Kokkos::pair<T,T> & val + , Args ... args ) const + { + // d <= e - b + const int n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.second ) + , static_cast<unsigned long>( val.begin ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + // { begin , end } range + template< class T , size_t ... DimArgs , class ... Args > + void error( char * buf , int buf_len + , unsigned domain_rank + , unsigned range_rank + , const ViewDimension< DimArgs ... > & dim + , const std::initializer_list< T > & val + , Args ... args ) const + { + // d <= e - b + int n = 0 ; + if ( val.size() == 2 ) { + n = std::min( buf_len , + snprintf( buf , buf_len + , " %lu <= %lu - %lu %c" + , static_cast<unsigned long>( dim.extent( domain_rank ) ) + , static_cast<unsigned long>( val.begin()[0] ) + , static_cast<unsigned long>( val.begin()[1] ) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + } + else { + n = std::min( buf_len , + snprintf( buf , buf_len + , " { ... }.size() == %u %c" + , unsigned(val.size()) + , int( sizeof...(Args) ? ',' : ')' ) ) ); + } + + error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... ); + } + + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const + { +#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + + const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error ("); + error( buffer+n , LEN-n , 0 , 0 , dim , args... ); + + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + Kokkos::abort("Kokkos::subview bounds error"); +#endif + } + +#else + + template< size_t ... DimArgs , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + void error( const ViewDimension< DimArgs ... > & , Args ... ) const {} + +#endif + +public: + + template< size_t ... DimArgs , class ... Args > + KOKKOS_INLINE_FUNCTION + SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args ) + { + static_assert( DomainRank == sizeof...(DimArgs) , "" ); + static_assert( DomainRank == sizeof...(Args) , "" ); + + // Verifies that all arguments, up to 8, are integral types, + // integral extents, or don't exist. + static_assert( RangeRank == + unsigned( is_integral_extent<0,Args...>::value ) + + unsigned( is_integral_extent<1,Args...>::value ) + + unsigned( is_integral_extent<2,Args...>::value ) + + unsigned( is_integral_extent<3,Args...>::value ) + + unsigned( is_integral_extent<4,Args...>::value ) + + unsigned( is_integral_extent<5,Args...>::value ) + + unsigned( is_integral_extent<6,Args...>::value ) + + unsigned( is_integral_extent<7,Args...>::value ) , "" ); + + if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; } + + if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... ); + } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_t domain_offset( const iType i ) const + { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_t range_extent( const iType i ) const + { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; } + + template < typename iType > + KOKKOS_FORCEINLINE_FUNCTION + constexpr unsigned range_index( const iType i ) const + { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; } +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief Given a value type and dimension generate the View data type */ +template< class T , class Dim > +struct ViewDataType ; + +template< class T > +struct ViewDataType< T , ViewDimension<> > +{ + typedef T type ; +}; + +template< class T , size_t ... Args > +struct ViewDataType< T , ViewDimension< 0 , Args... > > +{ + typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ; +}; + +template< class T , size_t N , size_t ... Args > +struct ViewDataType< T , ViewDimension< N , Args... > > +{ + typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ; +}; + +/**\brief Analysis of View data type. + * + * Data type conforms to one of the following patterns : + * {const} value_type [][#][#][#] + * {const} value_type ***[#][#][#] + * Where the sum of counts of '*' and '[#]' is at most ten. + * + * Provide typedef for the ViewDimension<...> and value_type. + */ +template< class T > +struct ViewArrayAnalysis +{ + typedef T value_type ; + typedef typename std::add_const< T >::type const_value_type ; + typedef typename std::remove_const< T >::type non_const_value_type ; + typedef ViewDimension<> static_dimension ; + typedef ViewDimension<> dynamic_dimension ; + typedef ViewDimension<> dimension ; +}; + +template< class T , size_t N > +struct ViewArrayAnalysis< T[N] > +{ +private: + typedef ViewArrayAnalysis< T > nested ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::static_dimension::template prepend<N>::type + static_dimension ; + + typedef typename nested::dynamic_dimension dynamic_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + +template< class T > +struct ViewArrayAnalysis< T[] > +{ +private: + typedef ViewArrayAnalysis< T > nested ; + typedef typename nested::dimension nested_dimension ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::dynamic_dimension::template prepend<0>::type + dynamic_dimension ; + + typedef typename nested::static_dimension static_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + +template< class T > +struct ViewArrayAnalysis< T* > +{ +private: + typedef ViewArrayAnalysis< T > nested ; +public: + typedef typename nested::value_type value_type ; + typedef typename nested::const_value_type const_value_type ; + typedef typename nested::non_const_value_type non_const_value_type ; + + typedef typename nested::dynamic_dimension::template prepend<0>::type + dynamic_dimension ; + + typedef typename nested::static_dimension static_dimension ; + + typedef typename + ViewDimensionJoin< dynamic_dimension , static_dimension >::type + dimension ; +}; + + +template< class DataType , class ArrayLayout , class ValueType > +struct ViewDataAnalysis +{ +private: + + typedef ViewArrayAnalysis< DataType > array_analysis ; + + // ValueType is opportunity for partial specialization. + // Must match array analysis when this default template is used. + static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" ); + +public: + + typedef void specialize ; // No specialization + + typedef typename array_analysis::dimension dimension ; + typedef typename array_analysis::value_type value_type ; + typedef typename array_analysis::const_value_type const_value_type ; + typedef typename array_analysis::non_const_value_type non_const_value_type ; + + // Generate analogous multidimensional array specification type. + typedef typename ViewDataType< value_type , dimension >::type type ; + typedef typename ViewDataType< const_value_type , dimension >::type const_type ; + typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ; + + // Generate "flattened" multidimensional array specification type. + typedef type scalar_array_type ; + typedef const_type const_scalar_array_type ; + typedef non_const_type non_const_scalar_array_type ; +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template < class Dimension , class Layout , typename Enable = void > +struct ViewOffset { + using is_mapping_plugin = std::false_type ; +}; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutLeft + , typename std::enable_if<( 1 >= Dimension::rank + || + 0 == Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutLeft array_layout ; + + dimension_type m_dim ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i0 + m_dim.N0 * i1 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 ); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 + m_dim.N0 * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * ( + i6 + m_dim.N6 * i7 )))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; } + if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } + if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } + if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } + if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } + if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } + if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } + if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & + , Kokkos::LayoutLeft const & arg_layout + ) + : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" ); + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" ); + if ( rhs.m_stride.S0 != 1 ) { + Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" ); + } + } + + //---------------------------------------- + // Subview construction + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( + const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , + const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) + : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( ( 0 == dimension_type::rank ) || + ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutLeft + , typename std::enable_if<( 1 < Dimension::rank + && + 0 < Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutLeft array_layout ; + + dimension_type m_dim ; + size_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i0 + m_stride * i1 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 + m_stride * ( i1 + m_dim.N1 * i2 ); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * i3 )); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * i4 ))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * i5 )))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * i6 ))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 + m_stride * ( + i1 + m_dim.N1 * ( + i2 + m_dim.N2 * ( + i3 + m_dim.N3 * ( + i4 + m_dim.N4 * ( + i5 + m_dim.N5 * ( + i6 + m_dim.N6 * i7 )))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 1 ; + if ( 0 < dimension_type::rank ) { s[1] = m_stride ; } + if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; } + if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; } + if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; } + if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; } + if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; } + if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; } + if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; } + } + + //---------------------------------------- + +private: + + template< unsigned TrivialScalarSize > + struct Padding { + enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + + // If memory alignment is a multiple of the trivial scalar size then attempt to align. + enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; + enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + + KOKKOS_INLINE_FUNCTION + static constexpr size_t stride( size_t const N ) + { + return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) + ? N + align - ( N % div_ok ) : N ; + } + }; + +public: + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + /* Enable padding for trivial scalar types with non-zero trivial scalar size */ + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size + , Kokkos::LayoutLeft const & arg_layout + ) + : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] + , arg_layout.dimension[2] , arg_layout.dimension[3] + , arg_layout.dimension[4] , arg_layout.dimension[5] + , arg_layout.dimension[6] , arg_layout.dimension[7] + ) + , m_stride( Padding<TrivialScalarSize>::stride( arg_layout.dimension[0] ) ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_1() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #0. + // The source dimension #0 must be non-zero for stride-one leading dimension. + // At most subsequent dimension can be non-zero. + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs , + const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , sub.range_extent(2) + , sub.range_extent(3) + , sub.range_extent(4) + , sub.range_extent(5) + , sub.range_extent(6) + , sub.range_extent(7)) + , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() : + ( 2 == sub.range_index(1) ? rhs.stride_2() : + ( 3 == sub.range_index(1) ? rhs.stride_3() : + ( 4 == sub.range_index(1) ? rhs.stride_4() : + ( 5 == sub.range_index(1) ? rhs.stride_5() : + ( 6 == sub.range_index(1) ? rhs.stride_6() : + ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 )))))))) + { + //static_assert( ( 2 == dimension_type::rank ) && + // ( 2 == dimension_type::rank_dynamic ) && + // ( 2 <= DimRHS::rank ) + // , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutRight + , typename std::enable_if<( 1 >= Dimension::rank + || + 0 == Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutRight array_layout ; + + dimension_type m_dim ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i1 + m_dim.N1 * i0 ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 )); + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))); + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 )))); + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))))); + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 )))))); + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i7 + m_dim.N7 * ( + i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( + i1 + m_dim.N1 * ( i0 ))))))); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } + if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } + if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } + if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } + if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } + if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } + if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; } + if ( 0 < dimension_type::rank ) { s[0] = n ; } + s[dimension_type::rank] = n * m_dim.N0 ; + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & + , Kokkos::LayoutRight const & arg_layout + ) + : m_dim( arg_layout.dimension[0], 0, 0, 0, 0, 0, 0, 0 ) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" ); + } + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1 + , "ViewOffset LayoutLeft/Right and LayoutStride are only compatible when rank == 1" ); + if ( rhs.m_stride.S0 != 1 ) { + Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride requires stride == 1" ); + } + } + + //---------------------------------------- + // Subview construction + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 ) + { + static_assert( ( 0 == dimension_type::rank_dynamic ) || + ( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank ) + , "ViewOffset subview construction requires compatible rank" ); + } +}; + +//---------------------------------------------------------------------------- +// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutRight + , typename std::enable_if<( 1 < Dimension::rank + && + 0 < Dimension::rank_dynamic + )>::type > +{ + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutRight array_layout ; + + dimension_type m_dim ; + size_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const { return i0 ; } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { return i1 + i0 * m_stride ; } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )) + + i0 * m_stride ; + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 ))) + + i0 * m_stride ; + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )))) + + i0 * m_stride ; + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 ))))) + + i0 * m_stride ; + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i7 + m_dim.N7 * ( + i6 + m_dim.N6 * ( + i5 + m_dim.N5 * ( + i4 + m_dim.N4 * ( + i3 + m_dim.N3 * ( + i2 + m_dim.N2 * ( i1 )))))) + + i0 * m_stride ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N3 + , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + /* Span of the range space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { return m_dim.N0 * m_stride ; } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const + { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + size_type n = 1 ; + if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; } + if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; } + if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; } + if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; } + if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; } + if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; } + if ( 1 < dimension_type::rank ) { s[1] = n ; } + if ( 0 < dimension_type::rank ) { s[0] = m_stride ; } + s[dimension_type::rank] = m_stride * m_dim.N0 ; + } + + //---------------------------------------- + +private: + + template< unsigned TrivialScalarSize > + struct Padding { + enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) }; + + // If memory alignment is a multiple of the trivial scalar size then attempt to align. + enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; + enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + + KOKKOS_INLINE_FUNCTION + static constexpr size_t stride( size_t const N ) + { + return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) + ? N + align - ( N % div_ok ) : N ; + } + }; + +public: + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + /* Enable padding for trivial scalar types with non-zero trivial scalar size. */ + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size + , Kokkos::LayoutRight const & arg_layout + ) + : m_dim( arg_layout.dimension[0] , arg_layout.dimension[1] + , arg_layout.dimension[2] , arg_layout.dimension[3] + , arg_layout.dimension[4] , arg_layout.dimension[5] + , arg_layout.dimension[6] , arg_layout.dimension[7] + ) + , m_stride( Padding<TrivialScalarSize>:: + stride( /* 2 <= rank */ + m_dim.N1 * ( dimension_type::rank == 2 ? 1 : + m_dim.N2 * ( dimension_type::rank == 3 ? 1 : + m_dim.N3 * ( dimension_type::rank == 4 ? 1 : + m_dim.N4 * ( dimension_type::rank == 5 ? 1 : + m_dim.N5 * ( dimension_type::rank == 6 ? 1 : + m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) )) + {} + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_0() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + // Last dimension must be non-zero + + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , sub.range_extent(2) + , sub.range_extent(3) + , sub.range_extent(4) + , sub.range_extent(5) + , sub.range_extent(6) + , sub.range_extent(7)) + , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : ( + 1 == sub.range_index(0) ? rhs.stride_1() : ( + 2 == sub.range_index(0) ? rhs.stride_2() : ( + 3 == sub.range_index(0) ? rhs.stride_3() : ( + 4 == sub.range_index(0) ? rhs.stride_4() : ( + 5 == sub.range_index(0) ? rhs.stride_5() : ( + 6 == sub.range_index(0) ? rhs.stride_6() : 0 ))))))) + { +/* // This subview must be 2 == rank and 2 == rank_dynamic + // due to only having stride #0. + // The source dimension #0 must be non-zero for stride-one leading dimension. + // At most subsequent dimension can be non-zero. + + static_assert( (( 2 == dimension_type::rank ) && + ( 2 <= DimRHS::rank )) || + () + , "ViewOffset subview construction requires compatible rank" ); +*/ + } +}; + +//---------------------------------------------------------------------------- +/* Strided array layout only makes sense for 0 < rank */ +/* rank = 0 included for DynRankView case */ + +template< unsigned Rank > +struct ViewStride ; + +template<> +struct ViewStride<0> { + enum { S0 = 0 , S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t , size_t , size_t , size_t + , size_t , size_t , size_t , size_t ) + {} +}; + +template<> +struct ViewStride<1> { + size_t S0 ; + enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t , size_t , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) + {} +}; + +template<> +struct ViewStride<2> { + size_t S0 , S1 ; + enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) + {} +}; + +template<> +struct ViewStride<3> { + size_t S0 , S1 , S2 ; + enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) + {} +}; + +template<> +struct ViewStride<4> { + size_t S0 , S1 , S2 , S3 ; + enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + {} +}; + +template<> +struct ViewStride<5> { + size_t S0 , S1 , S2 , S3 , S4 ; + enum { S5 = 0 , S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) + {} +}; + +template<> +struct ViewStride<6> { + size_t S0 , S1 , S2 , S3 , S4 , S5 ; + enum { S6 = 0 , S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) + {} +}; + +template<> +struct ViewStride<7> { + size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ; + enum { S7 = 0 }; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t aS6 , size_t ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) + {} +}; + +template<> +struct ViewStride<8> { + size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ; + + ViewStride() = default ; + ViewStride( const ViewStride & ) = default ; + ViewStride & operator = ( const ViewStride & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3 + , size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 ) + : S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 ) + , S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 ) + {} +}; + +template < class Dimension > +struct ViewOffset< Dimension , Kokkos::LayoutStride + , void > +{ +private: + typedef ViewStride< Dimension::rank > stride_type ; +public: + + using is_mapping_plugin = std::true_type ; + using is_regular = std::true_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Kokkos::LayoutStride array_layout ; + + dimension_type m_dim ; + stride_type m_stride ; + + //---------------------------------------- + + // rank 1 + template< typename I0 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 ) const + { + return i0 * m_stride.S0 ; + } + + // rank 2 + template < typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0 , I1 const & i1 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 ; + } + + //rank 3 + template < typename I0, typename I1, typename I2 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 ; + } + + //rank 4 + template < typename I0, typename I1, typename I2, typename I3 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 ; + } + + //rank 5 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 ; + } + + //rank 6 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 ; + } + + //rank 7 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 + + i6 * m_stride.S6 ; + } + + //rank 8 + template < typename I0, typename I1, typename I2, typename I3 + , typename I4, typename I5, typename I6, typename I7 > + KOKKOS_INLINE_FUNCTION constexpr + size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 + , I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const + { + return i0 * m_stride.S0 + + i1 * m_stride.S1 + + i2 * m_stride.S2 + + i3 * m_stride.S3 + + i4 * m_stride.S4 + + i5 * m_stride.S5 + + i6 * m_stride.S6 + + i7 * m_stride.S7 ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr array_layout layout() const + { + return array_layout( m_dim.N0 , m_stride.S0 + , m_dim.N1 , m_stride.S1 + , m_dim.N2 , m_stride.S2 + , m_dim.N3 , m_stride.S3 + , m_dim.N4 , m_stride.S4 + , m_dim.N5 , m_stride.S5 + , m_dim.N6 , m_stride.S6 + , m_dim.N7 , m_stride.S7 + ); + } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + /* Cardinality of the domain index space */ + KOKKOS_INLINE_FUNCTION + constexpr size_type size() const + { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + +private: + + KOKKOS_INLINE_FUNCTION + static constexpr size_type Max( size_type lhs , size_type rhs ) + { return lhs < rhs ? rhs : lhs ; } + +public: + + /* Span of the range space, largest stride * dimension */ + KOKKOS_INLINE_FUNCTION + constexpr size_type span() const + { + return Max( m_dim.N0 * m_stride.S0 , + Max( m_dim.N1 * m_stride.S1 , + Max( m_dim.N2 * m_stride.S2 , + Max( m_dim.N3 * m_stride.S3 , + Max( m_dim.N4 * m_stride.S4 , + Max( m_dim.N5 * m_stride.S5 , + Max( m_dim.N6 * m_stride.S6 , + m_dim.N7 * m_stride.S7 ))))))); + } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); } + + /* Strides of dimensions */ + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; } + if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; } + if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; } + if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; } + if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; } + if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; } + if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; } + if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; } + s[dimension_type::rank] = span(); + } + + //---------------------------------------- + + ViewOffset() = default ; + ViewOffset( const ViewOffset & ) = default ; + ViewOffset & operator = ( const ViewOffset & ) = default ; + + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( std::integral_constant<unsigned,0> const & + , Kokkos::LayoutStride const & rhs ) + : m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3] + , rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] ) + , m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3] + , rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] ) + {} + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3() + , rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() ) + { + static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" ); + // Also requires equal static dimensions ... + } + + //---------------------------------------- + // Subview construction + +private: + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION static + constexpr size_t stride + ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs ) + { + return r > 7 ? 0 : ( + r == 0 ? rhs.stride_0() : ( + r == 1 ? rhs.stride_1() : ( + r == 2 ? rhs.stride_2() : ( + r == 3 ? rhs.stride_3() : ( + r == 4 ? rhs.stride_4() : ( + r == 5 ? rhs.stride_5() : ( + r == 6 ? rhs.stride_6() : rhs.stride_7() ))))))); + } + +public: + + template< class DimRHS , class LayoutRHS > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset + ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs + , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub + ) + // range_extent(r) returns 0 when dimension_type::rank <= r + : m_dim( sub.range_extent(0) + , sub.range_extent(1) + , sub.range_extent(2) + , sub.range_extent(3) + , sub.range_extent(4) + , sub.range_extent(5) + , sub.range_extent(6) + , sub.range_extent(7) + ) + // range_index(r) returns ~0u when dimension_type::rank <= r + , m_stride( stride( sub.range_index(0), rhs ) + , stride( sub.range_index(1), rhs ) + , stride( sub.range_index(2), rhs ) + , stride( sub.range_index(3), rhs ) + , stride( sub.range_index(4), rhs ) + , stride( sub.range_index(5), rhs ) + , stride( sub.range_index(6), rhs ) + , stride( sub.range_index(7), rhs ) + ) + {} +}; + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +/** \brief ViewDataHandle provides the type of the 'data handle' which the view + * uses to access data with the [] operator. It also provides + * an allocate function and a function to extract a raw ptr from the + * data handle. ViewDataHandle also defines an enum ReferenceAble which + * specifies whether references/pointers to elements can be taken and a + * 'return_type' which is what the view operators will give back. + * Specialisation of this object allows three things depending + * on ViewTraits and compiler options: + * (i) Use special allocator (e.g. huge pages/small pages and pinned memory) + * (ii) Use special data handle type (e.g. add Cuda Texture Object) + * (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads) + */ +template< class Traits , class Enable = void > +struct ViewDataHandle { + + typedef typename Traits::value_type value_type ; + typedef typename Traits::value_type * handle_type ; + typedef typename Traits::value_type & return_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + return handle_type( arg_data_ptr ); + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( handle_type const arg_data_ptr + , size_t offset ) + { + return handle_type( arg_data_ptr + offset ); + } +}; + +template< class Traits > +struct ViewDataHandle< Traits , + typename std::enable_if<( std::is_same< typename Traits::non_const_value_type + , typename Traits::value_type >::value + && + std::is_same< typename Traits::specialize , void >::value + && + Traits::memory_traits::Atomic + )>::type > +{ + typedef typename Traits::value_type value_type ; + typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits > handle_type ; + typedef typename Kokkos::Impl::AtomicDataElement< Traits > return_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + return handle_type( arg_data_ptr ); + } + + template<class SrcHandleType> + KOKKOS_INLINE_FUNCTION + static handle_type assign( const SrcHandleType& arg_handle + , size_t offset ) + { + return handle_type( arg_handle.ptr + offset ); + } +}; + +template< class Traits > +struct ViewDataHandle< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , void >::value + && + (!Traits::memory_traits::Aligned) + && + Traits::memory_traits::Restrict +#ifdef KOKKOS_HAVE_CUDA + && + (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || + std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) +#endif + && + (!Traits::memory_traits::Atomic) + )>::type > +{ + typedef typename Traits::value_type value_type ; + typedef typename Traits::value_type * KOKKOS_RESTRICT handle_type ; + typedef typename Traits::value_type & KOKKOS_RESTRICT return_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + return handle_type( arg_data_ptr ); + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( handle_type const arg_data_ptr + , size_t offset ) + { + return handle_type( arg_data_ptr + offset ); + } +}; + +template< class Traits > +struct ViewDataHandle< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , void >::value + && + Traits::memory_traits::Aligned + && + (!Traits::memory_traits::Restrict) +#ifdef KOKKOS_HAVE_CUDA + && + (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || + std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) +#endif + && + (!Traits::memory_traits::Atomic) + )>::type > +{ + typedef typename Traits::value_type value_type ; + typedef typename Traits::value_type * KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ; + typedef typename Traits::value_type & return_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) { + Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); + } + return handle_type( arg_data_ptr ); + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( handle_type const arg_data_ptr + , size_t offset ) + { + if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) { + Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); + } + return handle_type( arg_data_ptr + offset ); + } +}; + +template< class Traits > +struct ViewDataHandle< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , void >::value + && + Traits::memory_traits::Aligned + && + Traits::memory_traits::Restrict +#ifdef KOKKOS_HAVE_CUDA + && + (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || + std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) +#endif + && + (!Traits::memory_traits::Atomic) + )>::type > +{ + typedef typename Traits::value_type value_type ; + typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ; + typedef typename Traits::value_type & return_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + + KOKKOS_INLINE_FUNCTION + static handle_type assign( value_type * arg_data_ptr + , track_type const & /*arg_tracker*/ ) + { + if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) { + Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); + } + return handle_type( arg_data_ptr ); + } + + KOKKOS_INLINE_FUNCTION + static handle_type assign( handle_type const arg_data_ptr + , size_t offset ) + { + if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) { + Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); + } + return handle_type( arg_data_ptr + offset ); + } +}; +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +//---------------------------------------------------------------------------- + +/* + * The construction, assignment to default, and destruction + * are merged into a single functor. + * Primarily to work around an unresolved CUDA back-end bug + * that would lose the destruction cuda device function when + * called from the shared memory tracking destruction. + * Secondarily to have two fewer partial specializations. + */ +template< class ExecSpace + , class ValueType + , bool IsScalar = std::is_scalar< ValueType >::value + > +struct ViewValueFunctor ; + +template< class ExecSpace , class ValueType > +struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > +{ + typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; + + ExecSpace space ; + ValueType * ptr ; + size_t n ; + bool destroy ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i ) const + { + if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test + else { new (ptr+i) ValueType(); } + } + + ViewValueFunctor() = default ; + ViewValueFunctor( const ViewValueFunctor & ) = default ; + ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; + + ViewValueFunctor( ExecSpace const & arg_space + , ValueType * const arg_ptr + , size_t const arg_n ) + : space( arg_space ) + , ptr( arg_ptr ) + , n( arg_n ) + , destroy( false ) + {} + + void execute( bool arg ) + { + destroy = arg ; + if ( ! space.in_parallel() ) { + const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > + closure( *this , PolicyType( 0 , n ) ); + closure.execute(); + space.fence(); + } + else { + for ( size_t i = 0 ; i < n ; ++i ) operator()(i); + } + } + + void construct_shared_allocation() + { execute( false ); } + + void destroy_shared_allocation() + { execute( true ); } +}; + + +template< class ExecSpace , class ValueType > +struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ > +{ + typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; + + ExecSpace space ; + ValueType * ptr ; + size_t n ; + + KOKKOS_INLINE_FUNCTION + void operator()( const size_t i ) const + { ptr[i] = ValueType(); } + + ViewValueFunctor() = default ; + ViewValueFunctor( const ViewValueFunctor & ) = default ; + ViewValueFunctor & operator = ( const ViewValueFunctor & ) = default ; + + ViewValueFunctor( ExecSpace const & arg_space + , ValueType * const arg_ptr + , size_t const arg_n ) + : space( arg_space ) + , ptr( arg_ptr ) + , n( arg_n ) + {} + + void construct_shared_allocation() + { + if ( ! space.in_parallel() ) { + const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > + closure( *this , PolicyType( 0 , n ) ); + closure.execute(); + space.fence(); + } + else { + for ( size_t i = 0 ; i < n ; ++i ) operator()(i); + } + } + + void destroy_shared_allocation() {} +}; + +//---------------------------------------------------------------------------- +/** \brief View mapping for non-specialized data type and standard layout */ +template< class Traits > +class ViewMapping< Traits , + typename std::enable_if<( + std::is_same< typename Traits::specialize , void >::value + && + ViewOffset< typename Traits::dimension + , typename Traits::array_layout + , void >::is_mapping_plugin::value + )>::type > +{ +private: + + template< class , class ... > friend class ViewMapping ; + template< class , class ... > friend class Kokkos::View ; + + typedef ViewOffset< typename Traits::dimension + , typename Traits::array_layout + , void + > offset_type ; + + typedef typename ViewDataHandle< Traits >::handle_type handle_type ; + + handle_type m_handle ; + offset_type m_offset ; + + KOKKOS_INLINE_FUNCTION + ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) + : m_handle( arg_handle ) + , m_offset( arg_offset ) + {} + +public: + + //---------------------------------------- + // Domain dimensions + + enum { Rank = Traits::dimension::rank }; + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const + { return m_offset.m_dim.extent(r); } + + KOKKOS_INLINE_FUNCTION constexpr + typename Traits::array_layout layout() const + { return m_offset.layout(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } + + // Is a regular layout with uniform striding for each index. + using is_regular = typename offset_type::is_regular ; + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); } + + //---------------------------------------- + // Range span + + /** \brief Span of the mapped range */ + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); } + + /** \brief Is the mapped range span contiguous */ + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); } + + typedef typename ViewDataHandle< Traits >::return_type reference_type ; + typedef typename Traits::value_type * pointer_type ; + + /** \brief If data references are lvalue_reference than can query pointer to memory */ + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const + { + return std::is_lvalue_reference< reference_type >::value + ? (pointer_type) m_handle + : (pointer_type) 0 ; + } + + //---------------------------------------- + // The View class performs all rank and bounds checking before + // calling these element reference methods. + + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference() const { return m_handle[0]; } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename + std::enable_if< std::is_integral<I0>::value && + ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value + , reference_type >::type + reference( const I0 & i0 ) const { return m_handle[i0]; } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename + std::enable_if< std::is_integral<I0>::value && + std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value + , reference_type >::type + reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 ) const + { return m_handle[ m_offset(i0,i1) ]; } + + template< typename I0 , typename I1 , typename I2 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const + { return m_handle[ m_offset(i0,i1,i2) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const + { return m_handle[ m_offset(i0,i1,i2,i3) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const + { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } + + //---------------------------------------- + +private: + + enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; + enum { MemorySpanSize = sizeof(typename Traits::value_type) }; + +public: + + /** \brief Span, in bytes, of the referenced memory */ + KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const + { + return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask); + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION ~ViewMapping() {} + KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {} + KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + + KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) + : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) + { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + + //---------------------------------------- + + /**\brief Span, in bytes, of the required memory */ + KOKKOS_INLINE_FUNCTION + static constexpr size_t memory_span( typename Traits::array_layout const & arg_layout ) + { + typedef std::integral_constant< unsigned , 0 > padding ; + return ( offset_type( padding(), arg_layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + } + + /**\brief Wrap a span of memory */ + template< class ... P > + KOKKOS_INLINE_FUNCTION + ViewMapping( Kokkos::Impl::ViewCtorProp< P ... > const & arg_prop + , typename Traits::array_layout const & arg_layout + ) + : m_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value ) + , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout ) + {} + + //---------------------------------------- + /* Allocate and construct mapped array. + * Allocate via shared allocation record and + * return that record for allocation tracking. + */ + template< class ... P > + Kokkos::Impl::SharedAllocationRecord<> * + allocate_shared( Kokkos::Impl::ViewCtorProp< P... > const & arg_prop + , typename Traits::array_layout const & arg_layout ) + { + typedef Kokkos::Impl::ViewCtorProp< P... > alloc_prop ; + + typedef typename alloc_prop::execution_space execution_space ; + typedef typename Traits::memory_space memory_space ; + typedef typename Traits::value_type value_type ; + typedef ViewValueFunctor< execution_space , value_type > functor_type ; + typedef Kokkos::Impl::SharedAllocationRecord< memory_space , functor_type > record_type ; + + // Query the mapping for byte-size of allocation. + // If padding is allowed then pass in sizeof value type + // for padding computation. + typedef std::integral_constant + < unsigned + , alloc_prop::allow_padding ? sizeof(value_type) : 0 + > padding ; + + m_offset = offset_type( padding(), arg_layout ); + + const size_t alloc_size = + ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + + // Create shared memory tracking record with allocate memory from the memory space + record_type * const record = + record_type::allocate( ( (Kokkos::Impl::ViewCtorProp<void,memory_space> const &) arg_prop ).value + , ( (Kokkos::Impl::ViewCtorProp<void,std::string> const &) arg_prop ).value + , alloc_size ); + + // Only set the the pointer and initialize if the allocation is non-zero. + // May be zero if one of the dimensions is zero. + if ( alloc_size ) { + + m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); + + if ( alloc_prop::initialize ) { + // Assume destruction is only required when construction is requested. + // The ViewValueFunctor has both value construction and destruction operators. + record->m_destroy = functor_type( ( (Kokkos::Impl::ViewCtorProp<void,execution_space> const &) arg_prop).value + , (value_type *) m_handle + , m_offset.span() + ); + + // Construct values + record->m_destroy.construct_shared_allocation(); + } + } + + return record ; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +/** \brief Assign compatible default mappings */ + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + /* default mappings */ + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + /* same layout */ + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + /* known layout */ + ( + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + ) + )>::type > +{ +private: + + enum { is_assignable_space = +#if 1 + Kokkos::Impl::MemorySpaceAccess + < typename DstTraits::memory_space + , typename SrcTraits::memory_space >::assignable }; +#else + std::is_same< typename DstTraits::memory_space + , typename SrcTraits::memory_space >::value }; +#endif + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_dimension = + ViewDimensionAssignable< typename DstTraits::dimension + , typename SrcTraits::dimension >::value }; + + enum { is_assignable_layout = + std::is_same< typename DstTraits::array_layout + , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout + , Kokkos::LayoutStride >::value || + ( DstTraits::dimension::rank == 0 ) || + ( DstTraits::dimension::rank == 1 && + DstTraits::dimension::rank_dynamic == 1 ) + }; + +public: + + enum { is_assignable = is_assignable_space && + is_assignable_value_type && + is_assignable_dimension && + is_assignable_layout }; + + typedef Kokkos::Impl::SharedAllocationTracker TrackType ; + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + KOKKOS_INLINE_FUNCTION + static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) + { + static_assert( is_assignable_space + , "View assignment must have compatible spaces" ); + + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_dimension + , "View assignment must have compatible dimensions" ); + + static_assert( is_assignable_layout + , "View assignment must have compatible layout or have rank <= 1" ); + + typedef typename DstType::offset_type dst_offset_type ; + + if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) { + typedef typename DstTraits::dimension dst_dim; + bool assignable = + ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN0 == src.dimension_0() : true ) && + ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN1 == src.dimension_1() : true ) && + ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN2 == src.dimension_2() : true ) && + ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN3 == src.dimension_3() : true ) && + ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN4 == src.dimension_4() : true ) && + ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN5 == src.dimension_5() : true ) && + ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN6 == src.dimension_6() : true ) && + ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN7 == src.dimension_7() : true ) + ; + if(!assignable) + Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); + } + dst.m_offset = dst_offset_type( src.m_offset ); + dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Subview mapping. +// Deduce destination view type from source view traits and subview arguments + +template< class SrcTraits , class ... Args > +struct ViewMapping + < typename std::enable_if<( + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout + , Kokkos::LayoutStride >::value + ) + )>::type + , SrcTraits + , Args ... > +{ +private: + + static_assert( SrcTraits::rank == sizeof...(Args) , + "Subview mapping requires one argument for each dimension of source View" ); + + enum + { RZ = false + , R0 = bool(is_integral_extent<0,Args...>::value) + , R1 = bool(is_integral_extent<1,Args...>::value) + , R2 = bool(is_integral_extent<2,Args...>::value) + , R3 = bool(is_integral_extent<3,Args...>::value) + , R4 = bool(is_integral_extent<4,Args...>::value) + , R5 = bool(is_integral_extent<5,Args...>::value) + , R6 = bool(is_integral_extent<6,Args...>::value) + , R7 = bool(is_integral_extent<7,Args...>::value) + }; + + enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Whether right-most rank is a range. + enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : ( + 1 == SrcTraits::rank ? R0 : ( + 2 == SrcTraits::rank ? R1 : ( + 3 == SrcTraits::rank ? R2 : ( + 4 == SrcTraits::rank ? R3 : ( + 5 == SrcTraits::rank ? R4 : ( + 6 == SrcTraits::rank ? R5 : ( + 7 == SrcTraits::rank ? R6 : R7 )))))))) }; + + // Subview's layout + typedef typename std::conditional< + ( /* Same array layout IF */ + ( rank == 0 ) /* output rank zero */ + || + SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, typename SrcTraits::array_layout, + rank, SrcTraits::rank, 0, Args...>::value + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ) //replace with input rank + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ) //replace input rank + ), typename SrcTraits::array_layout , Kokkos::LayoutStride + >::type array_layout ; + + typedef typename SrcTraits::value_type value_type ; + + typedef typename std::conditional< rank == 0 , value_type , + typename std::conditional< rank == 1 , value_type * , + typename std::conditional< rank == 2 , value_type ** , + typename std::conditional< rank == 3 , value_type *** , + typename std::conditional< rank == 4 , value_type **** , + typename std::conditional< rank == 5 , value_type ***** , + typename std::conditional< rank == 6 , value_type ****** , + typename std::conditional< rank == 7 , value_type ******* , + value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type + data_type ; + +public: + + typedef Kokkos::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > traits_type ; + + typedef Kokkos::View + < data_type + , array_layout + , typename SrcTraits::device_type + , typename SrcTraits::memory_traits > type ; + + template< class MemoryTraits > + struct apply { + + static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); + + typedef Kokkos::ViewTraits + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > traits_type ; + + typedef Kokkos::View + < data_type + , array_layout + , typename SrcTraits::device_type + , MemoryTraits > type ; + }; + + // The presumed type is 'ViewMapping< traits_type , void >' + // However, a compatible ViewMapping is acceptable. + template< class DstTraits > + KOKKOS_INLINE_FUNCTION + static void assign( ViewMapping< DstTraits , void > & dst + , ViewMapping< SrcTraits , void > const & src + , Args ... args ) + { + static_assert( + ViewMapping< DstTraits , traits_type , void >::is_assignable , + "Subview destination type must be compatible with subview derived type" ); + + typedef ViewMapping< DstTraits , void > DstType ; + + typedef typename DstType::offset_type dst_offset_type ; + + const SubviewExtents< SrcTraits::rank , rank > + extents( src.m_offset.m_dim , args... ); + + dst.m_offset = dst_offset_type( src.m_offset , extents ); + + dst.m_handle = ViewDataHandle< DstTraits >::assign(src.m_handle, + src.m_offset( extents.domain_offset(0) + , extents.domain_offset(1) + , extents.domain_offset(2) + , extents.domain_offset(3) + , extents.domain_offset(4) + , extents.domain_offset(5) + , extents.domain_offset(6) + , extents.domain_offset(7) + )); + } +}; + + + +//---------------------------------------------------------------------------- + +}}} // namespace Kokkos::Experimental::Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< unsigned , class MapType > +KOKKOS_INLINE_FUNCTION +bool view_verify_operator_bounds( const MapType & ) +{ return true ; } + +template< unsigned R , class MapType , class iType , class ... Args > +KOKKOS_INLINE_FUNCTION +bool view_verify_operator_bounds + ( const MapType & map + , const iType & i + , Args ... args + ) +{ + return ( size_t(i) < map.extent(R) ) + && view_verify_operator_bounds<R+1>( map , args ... ); +} + +template< unsigned , class MapType > +inline +void view_error_operator_bounds( char * , int , const MapType & ) +{} + +template< unsigned R , class MapType , class iType , class ... Args > +inline +void view_error_operator_bounds + ( char * buf + , int len + , const MapType & map + , const iType & i + , Args ... args + ) +{ + const int n = + snprintf(buf,len," %ld < %ld %c" + , static_cast<unsigned long>(i) + , static_cast<unsigned long>( map.extent(R) ) + , ( sizeof...(Args) ? ',' : ')' ) + ); + view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); +} + +template< class MapType , class ... Args > +KOKKOS_INLINE_FUNCTION +void view_verify_operator_bounds + ( const char* label , const MapType & map , Args ... args ) +{ + if ( ! view_verify_operator_bounds<0>( map , args ... ) ) { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + int n = snprintf(buffer,LEN,"View bounds error of view %s (", label); + view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + Kokkos::abort("View bounds error"); +#endif + } +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp deleted file mode 100644 index 5748e722c0076e9f47a7c538bd4d2b6f7458e9b8..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp +++ /dev/null @@ -1,1341 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWOFFSET_HPP -#define KOKKOS_VIEWOFFSET_HPP - -#include <Kokkos_Pair.hpp> -#include <Kokkos_Layout.hpp> -#include <impl/Kokkos_Traits.hpp> -#include <impl/Kokkos_Shape.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { namespace Impl { - -template < class ShapeType , class LayoutType , typename Enable = void > -struct ViewOffset ; - -//---------------------------------------------------------------------------- -// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding -template < class ShapeType > -struct ViewOffset< ShapeType , LayoutLeft - , typename enable_if<( 1 >= ShapeType::rank - || - 0 == ShapeType::rank_dynamic - )>::type > - : public ShapeType -{ - typedef size_t size_type ; - typedef ShapeType shape_type ; - typedef LayoutLeft array_layout ; - - enum { has_padding = false }; - - template< unsigned R > - KOKKOS_INLINE_FUNCTION - void assign( size_t n ) - { assign_shape_dimension<R>( *this , n ); } - - // Return whether the subview introduced noncontiguity - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 0 == shape_type::rank && - Impl::is_same<L,LayoutLeft>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { - return false ; // did not introduce noncontiguity - } - - // This subview must be 1 == rank and 1 == rank_dynamic. - // The source dimension #0 must be non-zero and all other dimensions are zero. - // Return whether the subview introduced noncontiguity - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 1 == shape_type::rank && - 1 == shape_type::rank_dynamic && - 1 <= S::rank && - Impl::is_same<L,LayoutLeft>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { - // n1 .. n7 must be zero - shape_type::N0 = n0 ; - return false ; // did not introduce noncontiguity - } - - - KOKKOS_INLINE_FUNCTION - void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 - , size_t n4 , size_t n5 , size_t n6 , size_t n7 - , size_t = 0 ) - { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - )>::type * = 0 ) - { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs - , typename enable_if<( 1 == int(ShapeRHS::rank) - && - 1 == int(shape_type::rank) - && - 1 == int(shape_type::rank_dynamic) - )>::type * = 0 ) - { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } - - KOKKOS_INLINE_FUNCTION - void set_padding() {} - - KOKKOS_INLINE_FUNCTION - size_type cardinality() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type capacity() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - // Stride with [ rank ] value is the total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - s[0] = 1 ; - if ( 0 < shape_type::rank ) { s[1] = shape_type::N0 ; } - if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; } - if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; } - if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; } - if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; } - if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; } - if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; } - if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; } - } - - KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; } - KOKKOS_INLINE_FUNCTION size_type stride_1() const { return shape_type::N0 ; } - KOKKOS_INLINE_FUNCTION size_type stride_2() const { return shape_type::N0 * shape_type::N1 ; } - KOKKOS_INLINE_FUNCTION size_type stride_3() const { return shape_type::N0 * shape_type::N1 * shape_type::N2 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_4() const - { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_5() const - { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_6() const - { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_7() const - { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; } - - // rank 1 - template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const & i0 ) const { return i0 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const & i0 , I1 const & i1 ) const - { return i0 + shape_type::N0 * i1 ; } - - //rank 3 - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0 - , I1 const& i1 - , I2 const& i2 - ) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * i2 ); - } - - //rank 4 - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * i3 )); - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * i4 ))); - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * i5 )))); - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * ( - i5 + shape_type::N5 * i6 ))))); - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6, typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7) const - { - return i0 + shape_type::N0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * ( - i5 + shape_type::N5 * ( - i6 + shape_type::N6 * i7 )))))); - } -}; - -//---------------------------------------------------------------------------- -// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding -template < class ShapeType > -struct ViewOffset< ShapeType , LayoutLeft - , typename enable_if<( 1 < ShapeType::rank - && - 0 < ShapeType::rank_dynamic - )>::type > - : public ShapeType -{ - typedef size_t size_type ; - typedef ShapeType shape_type ; - typedef LayoutLeft array_layout ; - - enum { has_padding = true }; - - size_type S0 ; - - // This subview must be 2 == rank and 2 == rank_dynamic - // due to only having stride #0. - // The source dimension #0 must be non-zero for stride-one leading dimension. - // At most subsequent dimension can be non-zero. - // Return whether the subview introduced noncontiguity. - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 2 == shape_type::rank && - 2 == shape_type::rank_dynamic && - 2 <= S::rank && - Impl::is_same<L,LayoutLeft>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & rhs - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { - // N1 = second non-zero dimension - // S0 = stride for second non-zero dimension - shape_type::N0 = n0 ; - shape_type::N1 = 0 ; - S0 = 0 ; - - if ( n1 ) { shape_type::N1 = n1 ; S0 = rhs.stride_1(); } - else if ( 2 < S::rank && n2 ) { shape_type::N1 = n2 ; S0 = rhs.stride_2(); } - else if ( 3 < S::rank && n3 ) { shape_type::N1 = n3 ; S0 = rhs.stride_3(); } - else if ( 4 < S::rank && n4 ) { shape_type::N1 = n4 ; S0 = rhs.stride_4(); } - else if ( 5 < S::rank && n5 ) { shape_type::N1 = n5 ; S0 = rhs.stride_5(); } - else if ( 6 < S::rank && n6 ) { shape_type::N1 = n6 ; S0 = rhs.stride_6(); } - else if ( 7 < S::rank && n7 ) { shape_type::N1 = n7 ; S0 = rhs.stride_7(); } - - // Introduce noncontiguity if change the first dimension - // or took a range of a dimension after the second. - return ( size_t(shape_type::N0) != size_t(rhs.N0) ) || ( 0 == n1 ); - } - - - template< unsigned R > - KOKKOS_INLINE_FUNCTION - void assign( size_t n ) - { assign_shape_dimension<R>( *this , n ); } - - - KOKKOS_INLINE_FUNCTION - void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 - , size_t n4 , size_t n5 , size_t n6 , size_t n7 - , size_t = 0 ) - { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); S0 = shape_type::N0 ; } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - && - int(ShapeRHS::rank_dynamic) == 0 - )>::type * = 0 ) - { - shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); - S0 = shape_type::N0 ; // No padding when dynamic_rank == 0 - } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - && - int(ShapeRHS::rank_dynamic) > 0 - )>::type * = 0 ) - { - shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); - S0 = rhs.S0 ; // possibly padding when dynamic rank > 0 - } - - KOKKOS_INLINE_FUNCTION - void set_padding() - { - enum { div = MEMORY_ALIGNMENT / shape_type::scalar_size }; - enum { mod = MEMORY_ALIGNMENT % shape_type::scalar_size }; - enum { align = 0 == mod ? div : 0 }; - - if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < S0 ) { - - const size_type count_mod = S0 % ( div ? div : 1 ); - - if ( count_mod ) { S0 += align - count_mod ; } - } - } - - KOKKOS_INLINE_FUNCTION - size_type cardinality() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type capacity() const - { return size_type(S0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - // Stride with [ rank ] as total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - s[0] = 1 ; - if ( 0 < shape_type::rank ) { s[1] = S0 ; } - if ( 1 < shape_type::rank ) { s[2] = s[1] * shape_type::N1 ; } - if ( 2 < shape_type::rank ) { s[3] = s[2] * shape_type::N2 ; } - if ( 3 < shape_type::rank ) { s[4] = s[3] * shape_type::N3 ; } - if ( 4 < shape_type::rank ) { s[5] = s[4] * shape_type::N4 ; } - if ( 5 < shape_type::rank ) { s[6] = s[5] * shape_type::N5 ; } - if ( 6 < shape_type::rank ) { s[7] = s[6] * shape_type::N6 ; } - if ( 7 < shape_type::rank ) { s[8] = s[7] * shape_type::N7 ; } - } - - KOKKOS_INLINE_FUNCTION size_type stride_0() const { return 1 ; } - KOKKOS_INLINE_FUNCTION size_type stride_1() const { return S0 ; } - KOKKOS_INLINE_FUNCTION size_type stride_2() const { return S0 * shape_type::N1 ; } - KOKKOS_INLINE_FUNCTION size_type stride_3() const { return S0 * shape_type::N1 * shape_type::N2 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_4() const - { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_5() const - { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_6() const - { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_7() const - { return S0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 ; } - - // rank 2 - template < typename I0 , typename I1 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const & i0 , I1 const & i1) const - { return i0 + S0 * i1 ; } - - //rank 3 - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * i2 ); - } - - //rank 4 - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * i3 )); - } - - //rank 5 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * i4 ))); - } - - //rank 6 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * i5 )))); - } - - //rank 7 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * ( - i5 + shape_type::N5 * i6 ))))); - } - - //rank 8 - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6, typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2, I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const - { - return i0 + S0 * ( - i1 + shape_type::N1 * ( - i2 + shape_type::N2 * ( - i3 + shape_type::N3 * ( - i4 + shape_type::N4 * ( - i5 + shape_type::N5 * ( - i6 + shape_type::N6 * i7 )))))); - } -}; - -//---------------------------------------------------------------------------- -// LayoutRight AND ( 1 >= rank OR 1 >= rank_dynamic ) : no padding / striding -template < class ShapeType > -struct ViewOffset< ShapeType , LayoutRight - , typename enable_if<( 1 >= ShapeType::rank - || - 1 >= ShapeType::rank_dynamic - )>::type > - : public ShapeType -{ - typedef size_t size_type; - typedef ShapeType shape_type; - typedef LayoutRight array_layout ; - - enum { has_padding = false }; - - // This subview must be 1 == rank and 1 == rank_dynamic - // The source view's last dimension must be non-zero - // Return whether the subview introduced noncontiguity - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 0 == shape_type::rank && - Impl::is_same<L,LayoutRight>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { return false ; } - - // This subview must be 1 == rank and 1 == rank_dynamic - // The source view's last dimension must be non-zero - // Return whether the subview introduced noncontiguity - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 1 == shape_type::rank && - 1 == shape_type::rank_dynamic && - 1 <= S::rank && - Impl::is_same<L,LayoutRight>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { - shape_type::N0 = S::rank == 1 ? n0 : ( - S::rank == 2 ? n1 : ( - S::rank == 3 ? n2 : ( - S::rank == 4 ? n3 : ( - S::rank == 5 ? n4 : ( - S::rank == 6 ? n5 : ( - S::rank == 7 ? n6 : n7 )))))); - // should have n0 .. n_(rank-2) equal zero - return false ; - } - - template< unsigned R > - KOKKOS_INLINE_FUNCTION - void assign( size_t n ) - { assign_shape_dimension<R>( *this , n ); } - - KOKKOS_INLINE_FUNCTION - void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 - , size_t n4 , size_t n5 , size_t n6 , size_t n7 - , size_t = 0 ) - { shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - )>::type * = 0 ) - { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutLeft > & rhs - , typename enable_if<( 1 == int(ShapeRHS::rank) - && - 1 == int(shape_type::rank) - && - 1 == int(shape_type::rank_dynamic) - )>::type * = 0 ) - { shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); } - - KOKKOS_INLINE_FUNCTION - void set_padding() {} - - KOKKOS_INLINE_FUNCTION - size_type cardinality() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type capacity() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - size_type stride_R() const - { - return size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 * - shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; - }; - - // Stride with [rank] as total length - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - size_type n = 1 ; - if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; } - if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; } - if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; } - if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; } - if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; } - if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; } - if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; } - if ( 0 < shape_type::rank ) { s[0] = n ; } - s[shape_type::rank] = n * shape_type::N0 ; - } - - KOKKOS_INLINE_FUNCTION - size_type stride_7() const { return 1 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_6() const { return shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_0() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 * shape_type::N1 ; } - - // rank 1 - template <typename I0> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0) const - { - return i0 ; - } - - // rank 2 - template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1 ) const - { - return i1 + shape_type::N1 * i0 ; - } - - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const - { - return i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 )); - } - - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const - { - return i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 ))); - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const - { - return i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 )))); - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const - { - return i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 ))))); - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const - { - return i6 + shape_type::N6 * ( - i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 )))))); - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6, typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const - { - return i7 + shape_type::N7 * ( - i6 + shape_type::N6 * ( - i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( - i1 + shape_type::N1 * ( i0 ))))))); - } -}; - -//---------------------------------------------------------------------------- -// LayoutRight AND ( 1 < rank AND 1 < rank_dynamic ) : has padding / striding -template < class ShapeType > -struct ViewOffset< ShapeType , LayoutRight - , typename enable_if<( 1 < ShapeType::rank - && - 1 < ShapeType::rank_dynamic - )>::type > - : public ShapeType -{ - typedef size_t size_type; - typedef ShapeType shape_type; - typedef LayoutRight array_layout ; - - enum { has_padding = true }; - - size_type SR ; - - // This subview must be 2 == rank and 2 == rank_dynamic - // due to only having stride #(rank-1). - // The source dimension #(rank-1) must be non-zero for stride-one leading dimension. - // At most one prior dimension can be non-zero. - // Return whether the subview introduced noncontiguity. - template< class S , class L > - KOKKOS_INLINE_FUNCTION - typename Impl::enable_if<( 2 == shape_type::rank && - 2 == shape_type::rank_dynamic && - 2 <= S::rank && - Impl::is_same<L,LayoutRight>::value - ), bool >::type - assign_subview( const ViewOffset<S,L,void> & rhs - , const size_t n0 - , const size_t n1 - , const size_t n2 - , const size_t n3 - , const size_t n4 - , const size_t n5 - , const size_t n6 - , const size_t n7 - ) - { - const size_type nR = S::rank == 2 ? n1 : ( - S::rank == 3 ? n2 : ( - S::rank == 4 ? n3 : ( - S::rank == 5 ? n4 : ( - S::rank == 6 ? n5 : ( - S::rank == 7 ? n6 : n7 ))))); - - // N0 = first non-zero-dimension - // N1 = last non-zero dimension - // SR = stride for second non-zero dimension - shape_type::N0 = 0 ; - shape_type::N1 = nR ; - SR = 0 ; - - if ( n0 ) { shape_type::N0 = n0 ; SR = rhs.stride_0(); } - else if ( 2 < S::rank && n1 ) { shape_type::N0 = n1 ; SR = rhs.stride_1(); } - else if ( 3 < S::rank && n2 ) { shape_type::N0 = n2 ; SR = rhs.stride_2(); } - else if ( 4 < S::rank && n3 ) { shape_type::N0 = n3 ; SR = rhs.stride_3(); } - else if ( 5 < S::rank && n4 ) { shape_type::N0 = n4 ; SR = rhs.stride_4(); } - else if ( 6 < S::rank && n5 ) { shape_type::N0 = n5 ; SR = rhs.stride_5(); } - else if ( 7 < S::rank && n6 ) { shape_type::N0 = n6 ; SR = rhs.stride_6(); } - - // Introduce noncontiguous if change the last dimension - // or take a range of a dimension other than the second-to-last dimension. - - return 2 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N1) || 0 == n0 ) : ( - 3 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N2) || 0 == n1 ) : ( - 4 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N3) || 0 == n2 ) : ( - 5 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N4) || 0 == n3 ) : ( - 6 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N5) || 0 == n4 ) : ( - 7 == S::rank ? ( size_t(shape_type::N1) != size_t(rhs.N6) || 0 == n5 ) : ( - ( size_t(shape_type::N1) != size_t(rhs.N7) || 0 == n6 ) )))))); - } - - template< unsigned R > - KOKKOS_INLINE_FUNCTION - void assign( size_t n ) - { assign_shape_dimension<R>( *this , n ); } - - KOKKOS_INLINE_FUNCTION - void assign( size_t n0 , size_t n1 , size_t n2 , size_t n3 - , size_t n4 , size_t n5 , size_t n6 , size_t n7 - , size_t = 0 ) - { - shape_type::assign( *this , n0, n1, n2, n3, n4, n5, n6, n7 ); - SR = size_type(shape_type::N1) * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; - } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - && - int(ShapeRHS::rank_dynamic) <= 1 - )>::type * = 0 ) - { - shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); - SR = shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; - } - - template< class ShapeRHS > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset< ShapeRHS , LayoutRight > & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) - && - int(ShapeRHS::rank_dynamic) <= int(shape_type::rank_dynamic) - && - int(ShapeRHS::rank_dynamic) > 1 - )>::type * = 0 ) - { - shape_type::assign( *this , rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); - SR = rhs.SR ; - } - - KOKKOS_INLINE_FUNCTION - void set_padding() - { - enum { div = MEMORY_ALIGNMENT / shape_type::scalar_size }; - enum { mod = MEMORY_ALIGNMENT % shape_type::scalar_size }; - enum { align = 0 == mod ? div : 0 }; - - if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < SR ) { - - const size_type count_mod = SR % ( div ? div : 1 ); - - if ( count_mod ) { SR += align - count_mod ; } - } - } - - KOKKOS_INLINE_FUNCTION - size_type cardinality() const - { return size_type(shape_type::N0) * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type capacity() const { return shape_type::N0 * SR ; } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - size_type n = 1 ; - if ( 7 < shape_type::rank ) { s[7] = n ; n *= shape_type::N7 ; } - if ( 6 < shape_type::rank ) { s[6] = n ; n *= shape_type::N6 ; } - if ( 5 < shape_type::rank ) { s[5] = n ; n *= shape_type::N5 ; } - if ( 4 < shape_type::rank ) { s[4] = n ; n *= shape_type::N4 ; } - if ( 3 < shape_type::rank ) { s[3] = n ; n *= shape_type::N3 ; } - if ( 2 < shape_type::rank ) { s[2] = n ; n *= shape_type::N2 ; } - if ( 1 < shape_type::rank ) { s[1] = n ; n *= shape_type::N1 ; } - if ( 0 < shape_type::rank ) { s[0] = SR ; } - s[shape_type::rank] = SR * shape_type::N0 ; - } - - KOKKOS_INLINE_FUNCTION - size_type stride_7() const { return 1 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_6() const { return shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_5() const { return shape_type::N7 * shape_type::N6 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_4() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_3() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_2() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_1() const { return shape_type::N7 * shape_type::N6 * shape_type::N5 * shape_type::N4 * shape_type::N3 * shape_type::N2 ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_0() const { return SR ; } - - // rank 2 - template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1 ) const - { - return i1 + i0 * SR ; - } - - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const - { - return i2 + shape_type::N2 * ( i1 ) + - i0 * SR ; - } - - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const - { - return i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( i1 )) + - i0 * SR ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const - { - return i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( i1 ))) + - i0 * SR ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const - { - return i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( i1 )))) + - i0 * SR ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const - { - return i6 + shape_type::N6 * ( - i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( i1 ))))) + - i0 * SR ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6, typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - size_type operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const - { - return i7 + shape_type::N7 * ( - i6 + shape_type::N6 * ( - i5 + shape_type::N5 * ( - i4 + shape_type::N4 * ( - i3 + shape_type::N3 * ( - i2 + shape_type::N2 * ( i1 )))))) + - i0 * SR ; - } -}; - -//---------------------------------------------------------------------------- -// LayoutStride : -template < class ShapeType > -struct ViewOffset< ShapeType , LayoutStride - , typename enable_if<( 0 < ShapeType::rank )>::type > - : public ShapeType -{ - typedef size_t size_type; - typedef ShapeType shape_type; - typedef LayoutStride array_layout ; - - size_type S[ shape_type::rank + 1 ]; - - template< class SType , class L > - KOKKOS_INLINE_FUNCTION - bool assign_subview( const ViewOffset<SType,L,void> & rhs - , const size_type n0 - , const size_type n1 - , const size_type n2 - , const size_type n3 - , const size_type n4 - , const size_type n5 - , const size_type n6 - , const size_type n7 - ) - { - shape_type::assign( *this, 0,0,0,0, 0,0,0,0 ); - - for ( int i = 0 ; i < int(shape_type::rank+1) ; ++i ) { S[i] = 0 ; } - - // preconditions: - // shape_type::rank <= rhs.rank - // shape_type::rank == count of nonzero( rhs_dim[i] ) - size_type dim[8] = { n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 }; - size_type str[ SType::rank + 1 ]; - - rhs.stride( str ); - - // contract the zero-dimensions - int r = 0 ; - for ( int i = 0 ; i < int(SType::rank) ; ++i ) { - if ( 0 != dim[i] ) { - dim[r] = dim[i] ; - str[r] = str[i] ; - ++r ; - } - } - - if ( int(shape_type::rank) == r ) { - // The shape is non-zero - for ( int i = 0 ; i < int(shape_type::rank) ; ++i ) { - const size_type cap = dim[i] * ( S[i] = str[i] ); - if ( S[ shape_type::rank ] < cap ) S[ shape_type::rank ] = cap ; - } - // set the contracted nonzero dimensions - shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] ); - } - - return true ; // definitely noncontiguous - } - - template< unsigned R > - KOKKOS_INLINE_FUNCTION - void assign( size_t n ) - { assign_shape_dimension<R>( *this , n ); } - - template< class ShapeRHS , class Layout > - KOKKOS_INLINE_FUNCTION - void assign( const ViewOffset<ShapeRHS,Layout> & rhs - , typename enable_if<( int(ShapeRHS::rank) == int(shape_type::rank) )>::type * = 0 ) - { - rhs.stride(S); - shape_type::assign( *this, rhs.N0, rhs.N1, rhs.N2, rhs.N3, rhs.N4, rhs.N5, rhs.N6, rhs.N7 ); - } - - KOKKOS_INLINE_FUNCTION - void assign( const LayoutStride & layout ) - { - size_type max = 0 ; - for ( int i = 0 ; i < shape_type::rank ; ++i ) { - S[i] = layout.stride[i] ; - const size_type m = layout.dimension[i] * S[i] ; - if ( max < m ) { max = m ; } - } - S[ shape_type::rank ] = max ; - shape_type::assign( *this, layout.dimension[0], layout.dimension[1], - layout.dimension[2], layout.dimension[3], - layout.dimension[4], layout.dimension[5], - layout.dimension[6], layout.dimension[7] ); - } - - KOKKOS_INLINE_FUNCTION - void assign( size_t s0 , size_t s1 , size_t s2 , size_t s3 - , size_t s4 , size_t s5 , size_t s6 , size_t s7 - , size_t s8 ) - { - const size_t str[9] = { s0, s1, s2, s3, s4, s5, s6, s7, s8 }; - - // Last argument is the total length. - // Total length must be non-zero. - // All strides must be non-zero and less than total length. - bool ok = 0 < str[ shape_type::rank ] ; - - for ( int i = 0 ; ( i < shape_type::rank ) && - ( ok = 0 < str[i] && str[i] < str[ shape_type::rank ] ); ++i ); - - if ( ok ) { - size_t dim[8] = { 1,1,1,1,1,1,1,1 }; - int iorder[9] = { 0,0,0,0,0,0,0,0,0 }; - - // Ordering of strides smallest to largest. - for ( int i = 1 ; i < shape_type::rank ; ++i ) { - int j = i ; - for ( ; 0 < j && str[i] < str[ iorder[j-1] ] ; --j ) { - iorder[j] = iorder[j-1] ; - } - iorder[j] = i ; - } - - // Last argument is the total length. - iorder[ shape_type::rank ] = shape_type::rank ; - - // Determine dimension associated with each stride. - // Guarantees non-overlap by truncating dimension - // if ( 0 != str[ iorder[i+1] ] % str[ iorder[i] ] ) - for ( int i = 0 ; i < shape_type::rank ; ++i ) { - dim[ iorder[i] ] = str[ iorder[i+1] ] / str[ iorder[i] ] ; - } - - // Assign dimensions and strides: - shape_type::assign( *this, dim[0], dim[1], dim[2], dim[3], dim[4], dim[5], dim[6], dim[7] ); - for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = str[i] ; } - } - else { - shape_type::assign(*this,0,0,0,0,0,0,0,0); - for ( int i = 0 ; i <= shape_type::rank ; ++i ) { S[i] = 0 ; } - } - } - - KOKKOS_INLINE_FUNCTION - void set_padding() {} - - KOKKOS_INLINE_FUNCTION - size_type cardinality() const - { return shape_type::N0 * shape_type::N1 * shape_type::N2 * shape_type::N3 * shape_type::N4 * shape_type::N5 * shape_type::N6 * shape_type::N7 ; } - - KOKKOS_INLINE_FUNCTION - size_type capacity() const { return S[ shape_type::rank ]; } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { for ( int i = 0 ; i <= shape_type::rank ; ++i ) { s[i] = S[i] ; } } - - KOKKOS_INLINE_FUNCTION - size_type stride_0() const { return S[0] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_1() const { return S[1] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_2() const { return S[2] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_3() const { return S[3] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_4() const { return S[4] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_5() const { return S[5] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_6() const { return S[6] ; } - - KOKKOS_INLINE_FUNCTION - size_type stride_7() const { return S[7] ; } - - // rank 1 - template <typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==1),size_type>::type - operator()( I0 const& i0) const - { - return i0 * S[0] ; - } - - // rank 2 - template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==2),size_type>::type - operator()( I0 const& i0, I1 const& i1 ) const - { - return i0 * S[0] + i1 * S[1] ; - } - - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==3),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] ; - } - - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==4),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==5),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==6),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==7),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] ; - } - - template < typename I0, typename I1, typename I2, typename I3 - ,typename I4, typename I5, typename I6, typename I7 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< (std::is_integral<I0>::value) && (shape_type::rank==8),size_type>::type - operator()( I0 const& i0, I1 const& i1, I2 const& i2 , I3 const& i3, I4 const& i4, I5 const& i5, I6 const& i6, I7 const& i7 ) const - { - return i0 * S[0] + i1 * S[1] + i2 * S[2] + i3 * S[3] + i4 * S[4] + i5 * S[5] + i6 * S[6] + i7 * S[7] ; - } -}; - -//---------------------------------------------------------------------------- - -template< class T > -struct ViewOffsetRange { - - enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<T>::value >::value }; - - enum { is_range = false }; - - KOKKOS_INLINE_FUNCTION static - size_t dimension( size_t const , T const & ) { return 0 ; } - - KOKKOS_INLINE_FUNCTION static - size_t begin( T const & i ) { return size_t(i) ; } -}; - -template<> -struct ViewOffsetRange<void> { - enum { is_range = false }; -}; - -template<> -struct ViewOffsetRange< Kokkos::ALL > { - enum { is_range = true }; - - KOKKOS_INLINE_FUNCTION static - size_t dimension( size_t const n , ALL const & ) { return n ; } - - KOKKOS_INLINE_FUNCTION static - size_t begin( ALL const & ) { return 0 ; } -}; - -template< typename iType > -struct ViewOffsetRange< std::pair<iType,iType> > { - - enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value }; - - enum { is_range = true }; - - KOKKOS_INLINE_FUNCTION static - size_t dimension( size_t const n , std::pair<iType,iType> const & r ) - { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; } - - KOKKOS_INLINE_FUNCTION static - size_t begin( std::pair<iType,iType> const & r ) { return size_t(r.first) ; } -}; - -template< typename iType > -struct ViewOffsetRange< Kokkos::pair<iType,iType> > { - - enum { OK_integral_type = Impl::StaticAssert< Impl::is_integral<iType>::value >::value }; - - enum { is_range = true }; - - KOKKOS_INLINE_FUNCTION static - size_t dimension( size_t const n , Kokkos::pair<iType,iType> const & r ) - { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; } - - KOKKOS_INLINE_FUNCTION static - size_t begin( Kokkos::pair<iType,iType> const & r ) { return size_t(r.first) ; } -}; - -}} // namespace Kokkos::Impl - -#endif //KOKKOS_VIEWOFFSET_HPP - diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp deleted file mode 100644 index 8b63039f57000e9d3b0ffa2aaad5a0c3c94d27c4..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp +++ /dev/null @@ -1,393 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWSUPPORT_HPP -#define KOKKOS_VIEWSUPPORT_HPP - -#include <algorithm> -#include <Kokkos_ExecPolicy.hpp> -#include <impl/Kokkos_Shape.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Evaluate if LHS = RHS view assignment is allowed. */ -template< class ViewLHS , class ViewRHS > -struct ViewAssignable -{ - // Same memory space. - // Same value type. - // Compatible 'const' qualifier - // Cannot assign managed = unmannaged - enum { assignable_value = - ( is_same< typename ViewLHS::value_type , - typename ViewRHS::value_type >::value - || - is_same< typename ViewLHS::value_type , - typename ViewRHS::const_value_type >::value ) - && - is_same< typename ViewLHS::memory_space , - typename ViewRHS::memory_space >::value - && - ( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) ) - }; - - enum { assignable_shape = - // Compatible shape and matching layout: - ( ShapeCompatible< typename ViewLHS::shape_type , - typename ViewRHS::shape_type >::value - && - is_same< typename ViewLHS::array_layout , - typename ViewRHS::array_layout >::value ) - || - // Matching layout, same rank, and LHS dynamic rank - ( is_same< typename ViewLHS::array_layout , - typename ViewRHS::array_layout >::value - && - int(ViewLHS::rank) == int(ViewRHS::rank) - && - int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) ) - || - // Both rank-0, any shape and layout - ( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 ) - || - // Both rank-1 and LHS is dynamic rank-1, any shape and layout - ( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 && - int(ViewLHS::rank_dynamic) == 1 ) - }; - - enum { value = assignable_value && assignable_shape }; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class ExecSpace , class Type , bool Initialize > -struct ViewDefaultConstruct -{ ViewDefaultConstruct( Type * , size_t ) {} }; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class OutputView , class InputView , unsigned Rank = OutputView::Rank > -struct ViewRemap -{ - typedef typename OutputView::size_type size_type ; - - const OutputView output ; - const InputView input ; - const size_type n0 ; - const size_type n1 ; - const size_type n2 ; - const size_type n3 ; - const size_type n4 ; - const size_type n5 ; - const size_type n6 ; - const size_type n7 ; - - ViewRemap( const OutputView & arg_out , const InputView & arg_in ) - : output( arg_out ), input( arg_in ) - , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) - , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) - , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) - , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) - , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) - , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) - , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) - , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) - { - typedef typename OutputView::execution_space execution_space ; - Kokkos::RangePolicy< execution_space > range( 0 , n0 ); - parallel_for( range , *this ); - } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) { - for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) { - for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) { - for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) { - for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) { - for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) { - for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) { - output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7); - }}}}}}} - } -}; - -template< class OutputView , class InputView > -struct ViewRemap< OutputView , InputView , 0 > -{ - typedef typename OutputView::value_type value_type ; - typedef typename OutputView::memory_space dst_space ; - typedef typename InputView ::memory_space src_space ; - - ViewRemap( const OutputView & arg_out , const InputView & arg_in ) - { - DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() , - arg_in.ptr_on_device() , - sizeof(value_type) ); - } -}; - -//---------------------------------------------------------------------------- - -template< class ExecSpace , class Type > -struct ViewDefaultConstruct< ExecSpace , Type , true > -{ - Type * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { m_ptr[i] = Type(); } - - ViewDefaultConstruct( Type * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } -}; - -template< class OutputView , unsigned Rank = OutputView::Rank , - class Enabled = void > -struct ViewFill -{ - typedef typename OutputView::const_value_type const_value_type ; - typedef typename OutputView::size_type size_type ; - - const OutputView output ; - const_value_type input ; - - ViewFill( const OutputView & arg_out , const_value_type & arg_in ) - : output( arg_out ), input( arg_in ) - { - typedef typename OutputView::execution_space execution_space ; - Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() ); - parallel_for( range , *this ); - execution_space::fence(); - } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) { - for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) { - for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) { - for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) { - for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) { - for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) { - for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) { - output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ; - }}}}}}} - } -}; - -template< class OutputView > -struct ViewFill< OutputView , 0 > -{ - typedef typename OutputView::const_value_type const_value_type ; - typedef typename OutputView::memory_space dst_space ; - - ViewFill( const OutputView & arg_out , const_value_type & arg_in ) - { - DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in , - sizeof(const_value_type) ); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -struct ViewAllocateWithoutInitializing { - - const std::string label ; - - ViewAllocateWithoutInitializing() : label() {} - explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {} - explicit ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {} -}; - -struct ViewAllocate { - - const std::string label ; - - ViewAllocate() : label() {} - ViewAllocate( const std::string & arg_label ) : label( arg_label ) {} - ViewAllocate( const char * const arg_label ) : label( arg_label ) {} -}; - -} - -namespace Kokkos { -namespace Impl { - -template< class Traits , class AllocationProperties , class Enable = void > -struct ViewAllocProp : public Kokkos::Impl::false_type {}; - -template< class Traits > -struct ViewAllocProp< Traits , Kokkos::ViewAllocate - , typename Kokkos::Impl::enable_if<( - Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value - )>::type > - : public Kokkos::Impl::true_type -{ - typedef size_t size_type ; - typedef const ViewAllocate & property_type ; - - enum { Initialize = true }; - enum { AllowPadding = false }; - - inline - static const std::string & label( property_type p ) { return p.label ; } -}; - -template< class Traits > -struct ViewAllocProp< Traits , std::string - , typename Kokkos::Impl::enable_if<( - Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value - )>::type > - : public Kokkos::Impl::true_type -{ - typedef size_t size_type ; - typedef const std::string & property_type ; - - enum { Initialize = true }; - enum { AllowPadding = false }; - - inline - static const std::string & label( property_type s ) { return s ; } -}; - -template< class Traits , unsigned N > -struct ViewAllocProp< Traits , char[N] - , typename Kokkos::Impl::enable_if<( - Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value - )>::type > - : public Kokkos::Impl::true_type -{ -private: - typedef char label_type[N] ; -public: - - typedef size_t size_type ; - typedef const label_type & property_type ; - - enum { Initialize = true }; - enum { AllowPadding = false }; - - inline - static std::string label( property_type s ) { return std::string(s) ; } -}; - -template< class Traits > -struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing - , typename Kokkos::Impl::enable_if<( - Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value - )>::type > - : public Kokkos::Impl::true_type -{ - typedef size_t size_type ; - typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ; - - enum { Initialize = false }; - enum { AllowPadding = false }; - - inline - static std::string label( property_type s ) { return s.label ; } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class Traits , class PointerProperties , class Enable = void > -struct ViewRawPointerProp : public Kokkos::Impl::false_type {}; - -template< class Traits , typename T > -struct ViewRawPointerProp< Traits , T , - typename Kokkos::Impl::enable_if<( - Impl::is_same< T , typename Traits::value_type >::value || - Impl::is_same< T , typename Traits::non_const_value_type >::value - )>::type > - : public Kokkos::Impl::true_type -{ - typedef size_t size_type ; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */ - - diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp similarity index 92% rename from lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp rename to lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp index 8b3749e853a85eea341c4ce8462aec755de4bb11..ecbcf72fe0b6ad92b6ec074f7a1b6b5dcca3322a 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -145,9 +145,9 @@ public: //---------------------------------------- ~ViewOffset() = default ; - ViewOffset() = default ; - ViewOffset( const ViewOffset & ) = default ; - ViewOffset & operator = ( const ViewOffset & ) = default ; + KOKKOS_INLINE_FUNCTION ViewOffset() = default ; + KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ; + KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ; template< unsigned TrivialScalarSize > KOKKOS_INLINE_FUNCTION @@ -163,15 +163,15 @@ template< typename T , unsigned N0 , unsigned N1 , class ... P > struct ViewMapping < void - , Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> + , Kokkos::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> , Kokkos::LayoutTileLeft<N0,N1,true> , iType0 , iType1 > { typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ; - typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ; - typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ; - typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ; + typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ; + typedef Kokkos::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ; + typedef Kokkos::View< T[N0][N1] , LayoutLeft , P ... > type ; KOKKOS_INLINE_FUNCTION static void assign( ViewMapping< traits , void > & dst @@ -203,8 +203,8 @@ namespace Experimental { template< typename T , unsigned N0 , unsigned N1 , class ... P > KOKKOS_INLINE_FUNCTION -Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... > -tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src +Kokkos::View< T[N0][N1] , LayoutLeft , P... > +tile_subview( const Kokkos::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src , const size_t i_tile0 , const size_t i_tile1 ) @@ -213,7 +213,7 @@ tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1, // by using the first subview argument as the layout. typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ; - return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... > + return Kokkos::View< T[N0][N1] , LayoutLeft , P... > ( src , SrcLayout() , i_tile0 , i_tile1 ); } diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 5bb2b672e124f3b282d760562514afb1719fd957..795657fe876233c8ef7f962bdce12be4d0452e2f 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -17,12 +17,33 @@ TRIBITS_ADD_LIBRARY( # INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) IF(Kokkos_ENABLE_Serial) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial - SOURCES UnitTestMain.cpp TestSerial.cpp + SOURCES + UnitTestMain.cpp + serial/TestSerial_Atomics.cpp + serial/TestSerial_Other.cpp + serial/TestSerial_Reductions.cpp + serial/TestSerial_SubView_a.cpp + serial/TestSerial_SubView_b.cpp + serial/TestSerial_SubView_c01.cpp + serial/TestSerial_SubView_c02.cpp + serial/TestSerial_SubView_c03.cpp + serial/TestSerial_SubView_c04.cpp + serial/TestSerial_SubView_c05.cpp + serial/TestSerial_SubView_c06.cpp + serial/TestSerial_SubView_c07.cpp + serial/TestSerial_SubView_c08.cpp + serial/TestSerial_SubView_c09.cpp + serial/TestSerial_SubView_c10.cpp + serial/TestSerial_SubView_c11.cpp + serial/TestSerial_SubView_c12.cpp + serial/TestSerial_Team.cpp + serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_ViewAPI_b.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -33,7 +54,28 @@ ENDIF() IF(Kokkos_ENABLE_Pthread) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Threads - SOURCES UnitTestMain.cpp TestThreads.cpp + SOURCES + UnitTestMain.cpp + threads/TestThreads_Atomics.cpp + threads/TestThreads_Other.cpp + threads/TestThreads_Reductions.cpp + threads/TestThreads_SubView_a.cpp + threads/TestThreads_SubView_b.cpp + threads/TestThreads_SubView_c01.cpp + threads/TestThreads_SubView_c02.cpp + threads/TestThreads_SubView_c03.cpp + threads/TestThreads_SubView_c04.cpp + threads/TestThreads_SubView_c05.cpp + threads/TestThreads_SubView_c06.cpp + threads/TestThreads_SubView_c07.cpp + threads/TestThreads_SubView_c08.cpp + threads/TestThreads_SubView_c09.cpp + threads/TestThreads_SubView_c10.cpp + threads/TestThreads_SubView_c11.cpp + threads/TestThreads_SubView_c12.cpp + threads/TestThreads_Team.cpp + threads/TestThreads_ViewAPI_a.cpp + threads/TestThreads_ViewAPI_b.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -44,7 +86,28 @@ ENDIF() IF(Kokkos_ENABLE_OpenMP) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP - SOURCES UnitTestMain.cpp TestOpenMP.cpp TestOpenMP_a.cpp TestOpenMP_b.cpp TestOpenMP_c.cpp + SOURCES + UnitTestMain.cpp + openmp/TestOpenMP_Atomics.cpp + openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_SubView_a.cpp + openmp/TestOpenMP_SubView_b.cpp + openmp/TestOpenMP_SubView_c01.cpp + openmp/TestOpenMP_SubView_c02.cpp + openmp/TestOpenMP_SubView_c03.cpp + openmp/TestOpenMP_SubView_c04.cpp + openmp/TestOpenMP_SubView_c05.cpp + openmp/TestOpenMP_SubView_c06.cpp + openmp/TestOpenMP_SubView_c07.cpp + openmp/TestOpenMP_SubView_c08.cpp + openmp/TestOpenMP_SubView_c09.cpp + openmp/TestOpenMP_SubView_c10.cpp + openmp/TestOpenMP_SubView_c11.cpp + openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_Team.cpp + openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_ViewAPI_b.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -66,7 +129,36 @@ ENDIF() IF(Kokkos_ENABLE_Cuda) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda - SOURCES UnitTestMain.cpp TestCuda.cpp TestCuda_a.cpp TestCuda_b.cpp TestCuda_c.cpp + SOURCES + UnitTestMain.cpp + cuda/TestCuda_Atomics.cpp + cuda/TestCuda_Other.cpp + cuda/TestCuda_Reductions_a.cpp + cuda/TestCuda_Reductions_b.cpp + cuda/TestCuda_Spaces.cpp + cuda/TestCuda_SubView_a.cpp + cuda/TestCuda_SubView_b.cpp + cuda/TestCuda_SubView_c01.cpp + cuda/TestCuda_SubView_c02.cpp + cuda/TestCuda_SubView_c03.cpp + cuda/TestCuda_SubView_c04.cpp + cuda/TestCuda_SubView_c05.cpp + cuda/TestCuda_SubView_c06.cpp + cuda/TestCuda_SubView_c07.cpp + cuda/TestCuda_SubView_c08.cpp + cuda/TestCuda_SubView_c09.cpp + cuda/TestCuda_SubView_c10.cpp + cuda/TestCuda_SubView_c11.cpp + cuda/TestCuda_SubView_c12.cpp + cuda/TestCuda_Team.cpp + cuda/TestCuda_ViewAPI_a.cpp + cuda/TestCuda_ViewAPI_b.cpp + cuda/TestCuda_ViewAPI_c.cpp + cuda/TestCuda_ViewAPI_d.cpp + cuda/TestCuda_ViewAPI_e.cpp + cuda/TestCuda_ViewAPI_f.cpp + cuda/TestCuda_ViewAPI_g.cpp + cuda/TestCuda_ViewAPI_h.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 3d9d212c1ecdef658fdb9cf7d30fc542a6fb72d3..3203dec28c114ee4b7b0ca457dc3cb5cf2787708 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -3,50 +3,92 @@ KOKKOS_PATH = ../.. GTEST_PATH = ../../tpls/gtest vpath %.cpp ${KOKKOS_PATH}/core/unit_test +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda + TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp) +TEST_HEADERS += $(wildcard $(KOKKOS_PATH)/core/unit_test/*/*.hpp) default: build_all echo "End Build" -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/config/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= -lpthread + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test TEST_TARGETS = TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - OBJ_CUDA = TestCuda_c.o TestCuda_b.o TestCuda_a.o TestCuda.o UnitTestMain.o gtest-all.o + OBJ_CUDA = TestCuda_Other.o TestCuda_Reductions_a.o TestCuda_Reductions_b.o TestCuda_Atomics.o TestCuda_Team.o TestCuda_Spaces.o + OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + OBJ_OPENMP += TestCuda_SubView_c_all.o +else + OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o + OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o + OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o + OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o +endif + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o + OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o + OBJ_CUDA += UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Cuda TEST_TARGETS += test-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o + OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o + OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o + OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o + OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o + OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o + OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Threads TEST_TARGETS += test-threads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP_c.o TestOpenMP_b.o TestOpenMP_a.o TestOpenMP.o UnitTestMain.o gtest-all.o + OBJ_OPENMP = TestOpenMP_Other.o TestOpenMP_Reductions.o TestOpenMP_Atomics.o TestOpenMP_Team.o + OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + OBJ_OPENMP += TestOpenMP_SubView_c_all.o +else + OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o + OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o + OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o + OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o +endif + OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_OpenMP TEST_TARGETS += test-openmp endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + OBJ_OPENMP += TestSerial_SubView_c_all.o +else + OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o +endif + OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Serial TEST_TARGETS += test-serial endif @@ -61,7 +103,7 @@ OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_HWLOC TEST_TARGETS += test-hwloc -OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o UnitTestMain.o gtest-all.o +OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o TestDefaultDeviceType_b.o TestDefaultDeviceType_c.o TestDefaultDeviceType_d.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Default TEST_TARGETS += test-default diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp index 5388a60787cb9217a4436798d826dcc53f55d3f2..d22837f3ed7b67bccecfbe11ba4d71266a094616 100644 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -52,7 +52,7 @@ /*--------------------------------------------------------------------------*/ -#include <impl/KokkosExp_ViewArray.hpp> +#include <impl/Kokkos_ViewArray.hpp> namespace Test { diff --git a/lib/kokkos/core/unit_test/TestAggregateReduction.hpp b/lib/kokkos/core/unit_test/TestAggregateReduction.hpp deleted file mode 100644 index bd05cd347b979e305becead88a898d27b0a7d4f8..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestAggregateReduction.hpp +++ /dev/null @@ -1,191 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef TEST_AGGREGATE_REDUCTION_HPP -#define TEST_AGGREGATE_REDUCTION_HPP - -#include <gtest/gtest.h> - -#include <stdexcept> -#include <sstream> -#include <iostream> - -namespace Test { - -template< typename T , unsigned N > -struct StaticArray { - T value[N] ; - - KOKKOS_INLINE_FUNCTION - StaticArray() = default; - - KOKKOS_INLINE_FUNCTION - StaticArray( const StaticArray & rhs ) = default; - - KOKKOS_INLINE_FUNCTION - operator T () { return value[0]; } - - KOKKOS_INLINE_FUNCTION - StaticArray & operator = ( const T & rhs ) - { - for ( unsigned i = 0 ; i < N ; ++i ) value[i] = rhs ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - StaticArray & operator = ( const StaticArray & rhs ) = default; - - KOKKOS_INLINE_FUNCTION - StaticArray operator * ( const StaticArray & rhs ) - { - StaticArray tmp ; - for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] * rhs.value[i] ; - return tmp ; - } - - KOKKOS_INLINE_FUNCTION - StaticArray operator + ( const StaticArray & rhs ) - { - StaticArray tmp ; - for ( unsigned i = 0 ; i < N ; ++i ) tmp.value[i] = value[i] + rhs.value[i] ; - return tmp ; - } - - KOKKOS_INLINE_FUNCTION - StaticArray & operator += ( const StaticArray & rhs ) - { - for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - void operator += ( const volatile StaticArray & rhs ) volatile - { - for ( unsigned i = 0 ; i < N ; ++i ) value[i] += rhs.value[i] ; - } -}; - -static_assert(std::is_trivial<StaticArray<int, 4>>::value, "Not trivial"); - -template< typename T , class Space > -struct DOT { - typedef T value_type ; - typedef Space execution_space ; - - Kokkos::View< value_type * , Space > a ; - Kokkos::View< value_type * , Space > b ; - - DOT( const Kokkos::View< value_type * , Space > arg_a - , const Kokkos::View< value_type * , Space > arg_b - ) - : a( arg_a ), b( arg_b ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int i , value_type & update ) const - { - update += a(i) * b(i); - } -}; - -template< typename T , class Space > -struct FILL { - typedef T value_type ; - typedef Space execution_space ; - - Kokkos::View< value_type * , Space > a ; - Kokkos::View< value_type * , Space > b ; - - FILL( const Kokkos::View< value_type * , Space > & arg_a - , const Kokkos::View< value_type * , Space > & arg_b - ) - : a( arg_a ), b( arg_b ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int i ) const - { - a(i) = i % 2 ? i + 1 : 1 ; - b(i) = i % 2 ? 1 : i + 1 ; - } -}; - -template< class Space > -void TestViewAggregateReduction() -{ - -#if ! KOKKOS_USING_EXP_VIEW - - const int count = 2 ; - const long result = count % 2 ? ( count * ( ( count + 1 ) / 2 ) ) - : ( ( count / 2 ) * ( count + 1 ) ); - - Kokkos::View< long * , Space > a("a",count); - Kokkos::View< long * , Space > b("b",count); - Kokkos::View< StaticArray<long,4> * , Space > a4("a4",count); - Kokkos::View< StaticArray<long,4> * , Space > b4("b4",count); - Kokkos::View< StaticArray<long,10> * , Space > a10("a10",count); - Kokkos::View< StaticArray<long,10> * , Space > b10("b10",count); - - Kokkos::parallel_for( count , FILL<long,Space>(a,b) ); - Kokkos::parallel_for( count , FILL< StaticArray<long,4> , Space >(a4,b4) ); - Kokkos::parallel_for( count , FILL< StaticArray<long,10> , Space >(a10,b10) ); - - long r = 0; - StaticArray<long,4> r4 ; - StaticArray<long,10> r10 ; - - Kokkos::parallel_reduce( count , DOT<long,Space>(a,b) , r ); - Kokkos::parallel_reduce( count , DOT< StaticArray<long,4> , Space >(a4,b4) , r4 ); - Kokkos::parallel_reduce( count , DOT< StaticArray<long,10> , Space >(a10,b10) , r10 ); - - ASSERT_EQ( result , r ); - for ( int i = 0 ; i < 10 ; ++i ) { ASSERT_EQ( result , r10.value[i] ); } - for ( int i = 0 ; i < 4 ; ++i ) { ASSERT_EQ( result , r4.value[i] ); } - -#endif - -} - -} - -#endif /* #ifndef TEST_AGGREGATE_REDUCTION_HPP */ - diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index aee4bda06cea276e12fca664a48c81a428445bcd..7f1519045187c535c586659e757eeb24609ccb50 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -226,6 +226,148 @@ bool MinAtomicTest(T i0, T i1) return passed ; } +//--------------------------------------------------- +//--------------atomic_increment--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct IncFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_increment(&data()); + } + IncFunctor( T _i0 ) : i0(_i0) {} +}; + +template<class T, class execution_space > +T IncAtomic(T i0) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct IncFunctor<T,execution_space> f(i0); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T IncAtomicCheck(T i0) { + T* data = new T[1]; + data[0] = 0; + + *data = i0 + 1; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool IncAtomicTest(T i0) +{ + T res = IncAtomic<T,DeviceType>(i0); + T resSerial = IncAtomicCheck<T>(i0); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = IncAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + +//--------------------------------------------------- +//--------------atomic_decrement--------------------- +//--------------------------------------------------- + +template<class T,class DEVICE_TYPE> +struct DecFunctor{ + typedef DEVICE_TYPE execution_space; + typedef Kokkos::View<T,execution_space> type; + type data; + T i0; + + KOKKOS_INLINE_FUNCTION + void operator()(int) const { + Kokkos::atomic_decrement(&data()); + } + DecFunctor( T _i0 ) : i0(_i0) {} +}; + +template<class T, class execution_space > +T DecAtomic(T i0) { + struct InitFunctor<T,execution_space> f_init(i0); + typename InitFunctor<T,execution_space>::type data("Data"); + typename InitFunctor<T,execution_space>::h_type h_data("HData"); + f_init.data = data; + Kokkos::parallel_for(1,f_init); + execution_space::fence(); + + struct DecFunctor<T,execution_space> f(i0); + f.data = data; + Kokkos::parallel_for(1,f); + execution_space::fence(); + + Kokkos::deep_copy(h_data,data); + T val = h_data(); + return val; +} + +template<class T> +T DecAtomicCheck(T i0) { + T* data = new T[1]; + data[0] = 0; + + *data = i0 - 1; + + T val = *data; + delete [] data; + return val; +} + +template<class T,class DeviceType> +bool DecAtomicTest(T i0) +{ + T res = DecAtomic<T,DeviceType>(i0); + T resSerial = DecAtomicCheck<T>(i0); + + bool passed = true; + + if ( resSerial != res ) { + passed = false; + + std::cout << "Loop<" + << typeid(T).name() + << ">( test = DecAtomicTest" + << " FAILED : " + << resSerial << " != " << res + << std::endl ; + } + + return passed ; +} + //--------------------------------------------------- //--------------atomic_fetch_mul--------------------- //--------------------------------------------------- @@ -821,6 +963,8 @@ bool AtomicOperationsTestIntegralType( int i0 , int i1 , int test ) case 8: return XorAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); case 9: return LShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); case 10: return RShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + case 11: return IncAtomicTest<T,DeviceType>( (T)i0 ); + case 12: return DecAtomicTest<T,DeviceType>( (T)i0 ); } return 0; } diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index dfa2250c04ae8cc785383b1f64a127ad40279f57..71c22144896627cb8886a716acd214830fa53af0 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -70,8 +70,10 @@ struct AddFunctor { #ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT #pragma loop count(128) #endif +#ifndef KOKKOS_HAVE_DEBUG #ifdef KOKKOS_HAVE_PRAGMA_SIMD #pragma simd +#endif #endif for(int j=0;j<length;j++) a(i,j) += b(i,j); diff --git a/lib/kokkos/core/unit_test/TestCuda.cpp b/lib/kokkos/core/unit_test/TestCuda.cpp deleted file mode 100644 index e6155662525f08fd718e02a40243e942dd77104d..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestCuda.cpp +++ /dev/null @@ -1,290 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <iostream> - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> -#include <impl/Kokkos_ViewTileLeft.hpp> -#include <TestTile.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestMemoryPool.hpp> -#include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestCXX11Deduction.hpp> - -#include <TestTaskPolicy.hpp> -#include <TestPolicyConstruction.hpp> - -#include <TestMDRange.hpp> - -//---------------------------------------------------------------------------- - -class cuda : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -void cuda::SetUpTestCase() - { - Kokkos::Cuda::print_configuration( std::cout ); - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); - } - -void cuda::TearDownTestCase() - { - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); - } - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Test { - -__global__ -void test_abort() -{ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - Kokkos::CudaSpace , - Kokkos::HostSpace >::verify(); -} - -__global__ -void test_cuda_spaces_int_value( int * ptr ) -{ - if ( *ptr == 42 ) { *ptr = 2 * 42 ; } -} - -TEST_F( cuda , md_range ) { - TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100); -} - -TEST_F( cuda , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); -} - -TEST_F( cuda , memory_space ) -{ - TestMemorySpace< Kokkos::Cuda >(); -} - -TEST_F( cuda, uvm ) -{ - if ( Kokkos::CudaUVMSpace::available() ) { - - int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int)); - - *uvm_ptr = 42 ; - - Kokkos::Cuda::fence(); - test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr); - Kokkos::Cuda::fence(); - - EXPECT_EQ( *uvm_ptr, int(2*42) ); - - Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr ); - } -} - -//---------------------------------------------------------------------------- - -TEST_F( cuda , impl_shared_alloc ) -{ - test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >(); -} - -TEST_F( cuda, policy_construction) { - TestRangePolicyConstruction< Kokkos::Cuda >(); - TestTeamPolicyConstruction< Kokkos::Cuda >(); -} - -TEST_F( cuda , impl_view_mapping ) -{ - test_view_mapping< Kokkos::Cuda >(); - test_view_mapping< Kokkos::CudaUVMSpace >(); - test_view_mapping_subview< Kokkos::Cuda >(); - test_view_mapping_subview< Kokkos::CudaUVMSpace >(); - test_view_mapping_operator< Kokkos::Cuda >(); - test_view_mapping_operator< Kokkos::CudaUVMSpace >(); - TestViewMappingAtomic< Kokkos::Cuda >::run(); -} - -TEST_F( cuda , view_of_class ) -{ - TestViewMappingClassValue< Kokkos::CudaSpace >::run(); - TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); -} - -template< class MemSpace > -struct TestViewCudaTexture { - - enum { N = 1000 }; - - using V = Kokkos::Experimental::View<double*,MemSpace> ; - using T = Kokkos::Experimental::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ; - - V m_base ; - T m_tex ; - - struct TagInit {}; - struct TagTest {}; - - KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } - - KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_tex[i] != i + 1 ) ++error_count ; } - - TestViewCudaTexture() - : m_base("base",N) - , m_tex( m_base ) - {} - - static void run() - { - EXPECT_TRUE( ( std::is_same< typename V::reference_type - , double & - >::value ) ); - - EXPECT_TRUE( ( std::is_same< typename T::reference_type - , const double - >::value ) ); - - EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view - EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value - - TestViewCudaTexture self ; - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } -}; - -TEST_F( cuda , impl_view_texture ) -{ - TestViewCudaTexture< Kokkos::CudaSpace >::run(); - TestViewCudaTexture< Kokkos::CudaUVMSpace >::run(); -} - -template< class MemSpace , class ExecSpace > -struct TestViewCudaAccessible { - - enum { N = 1000 }; - - using V = Kokkos::Experimental::View<double*,MemSpace> ; - - V m_base ; - - struct TagInit {}; - struct TagTest {}; - - KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } - - KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_base[i] != i + 1 ) ++error_count ; } - - TestViewCudaAccessible() - : m_base("base",N) - {} - - static void run() - { - TestViewCudaAccessible self ; - Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self ); - MemSpace::execution_space::fence(); - // Next access is a different execution space, must complete prior kernel. - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } -}; - -TEST_F( cuda , impl_view_accessible ) -{ - TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run(); - - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run(); - - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run(); -} - -} diff --git a/lib/kokkos/core/unit_test/TestCuda_a.cpp b/lib/kokkos/core/unit_test/TestCuda_a.cpp deleted file mode 100644 index 4680c333867ff0e68f572121a654f8f23d09fcfb..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestCuda_a.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <iostream> - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> -#include <impl/Kokkos_ViewTileLeft.hpp> -#include <TestTile.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestMemoryPool.hpp> -#include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestCXX11Deduction.hpp> - -#include <TestTaskPolicy.hpp> -#include <TestPolicyConstruction.hpp> - -//---------------------------------------------------------------------------- - -class cuda : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -//---------------------------------------------------------------------------- - -namespace Test { - -TEST_F( cuda, view_impl ) -{ - // test_abort<<<32,32>>>(); // Aborts the kernel with CUDA version 4.1 or greater - - test_view_impl< Kokkos::Cuda >(); -} - -TEST_F( cuda, view_api ) -{ - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ; - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ; - - TestViewAPI< double , Kokkos::Cuda >(); - TestViewAPI< double , Kokkos::CudaUVMSpace >(); - -#if 0 - Kokkos::View<double, Kokkos::Cuda > x("x"); - Kokkos::View<double[1], Kokkos::Cuda > y("y"); - // *x = 10 ; - // x() = 10 ; - // y[0] = 10 ; - // y(0) = 10 ; -#endif -} - -TEST_F( cuda , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Cuda >(); -} - -TEST_F( cuda, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >(); -} - -TEST_F( cuda, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >(); -} - -TEST_F( cuda, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >(); -} - -TEST_F( cuda, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); -} - -TEST_F( cuda, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); -} - -TEST_F( cuda, view_subview_2d_from_5d ) { - TestViewSubview::test_2d_subview_5d< Kokkos::CudaUVMSpace >(); -} - -} diff --git a/lib/kokkos/core/unit_test/TestCuda_b.cpp b/lib/kokkos/core/unit_test/TestCuda_b.cpp deleted file mode 100644 index d4ca949e57cb02d15444ec7f3e48b123003b6a68..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestCuda_b.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <iostream> - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> -#include <impl/Kokkos_ViewTileLeft.hpp> -#include <TestTile.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestMemoryPool.hpp> -#include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestCXX11Deduction.hpp> - -#include <TestTaskPolicy.hpp> -#include <TestPolicyConstruction.hpp> - -//---------------------------------------------------------------------------- - -class cuda : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -//---------------------------------------------------------------------------- - -namespace Test { - -TEST_F( cuda, range_tag ) -{ - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - //TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); -} - -TEST_F( cuda, team_tag ) -{ - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(3); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(3); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); -} - -TEST_F( cuda, reduce ) -{ - TestReduce< long , Kokkos::Cuda >( 10000000 ); - TestReduce< double , Kokkos::Cuda >( 1000000 ); - TestReduce< int , Kokkos::Cuda >( 0 ); -} - -TEST_F( cuda , reducers ) -{ - TestReducers<int, Kokkos::Cuda>::execute_integer(); - TestReducers<size_t, Kokkos::Cuda>::execute_integer(); - TestReducers<double, Kokkos::Cuda>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic(); -} - -TEST_F( cuda, reduce_team ) -{ - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( cuda, shared_team ) -{ - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) -TEST_F( cuda, lambda_shared_team ) -{ - TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); - TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); -} -#endif - -TEST_F( cuda, shmem_size) { - TestShmemSize< Kokkos::Cuda >(); -} - -TEST_F( cuda, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -TEST_F( cuda, reduce_dynamic ) -{ - TestReduceDynamic< long , Kokkos::Cuda >( 10000000 ); - TestReduceDynamic< double , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, reduce_dynamic_view ) -{ - TestReduceDynamicView< long , Kokkos::Cuda >( 10000000 ); - TestReduceDynamicView< double , Kokkos::Cuda >( 1000000 ); -} - -} diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp index 1b1e0e67365fa28778cb848cbd52d0a2399c97e6..87a534f11dfc29913abb5f36eeba63da6d817c2e 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,7 +48,6 @@ #if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) //---------------------------------------------------------------------------- -#include <TestViewImpl.hpp> #include <TestAtomic.hpp> #include <TestViewAPI.hpp> @@ -60,6 +59,7 @@ #include <TestCompilerMacros.hpp> #include <TestCXX11.hpp> #include <TestTeamVector.hpp> +#include <TestUtilities.hpp> namespace Test { @@ -76,165 +76,24 @@ protected: } }; - -TEST_F( defaultdevicetype, view_impl) { - test_view_impl< Kokkos::DefaultExecutionSpace >(); -} - -TEST_F( defaultdevicetype, view_api) { - TestViewAPI< double , Kokkos::DefaultExecutionSpace >(); -} - -TEST_F( defaultdevicetype, long_reduce) { - TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce) { - TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - - -TEST_F( defaultdevicetype , atomics ) +TEST_F( defaultdevicetype, host_space_access ) { - const int loop_count = 1e4 ; + typedef Kokkos::HostSpace::execution_space host_exec_space ; + typedef Kokkos::Device< host_exec_space , Kokkos::HostSpace > device_space ; + typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space ; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + static_assert( + Kokkos::Impl::SpaceAccessibility< host_exec_space , Kokkos::HostSpace >::accessible , "" ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + static_assert( + Kokkos::Impl::SpaceAccessibility< device_space , Kokkos::HostSpace >::accessible , "" ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) ); + static_assert( + Kokkos::Impl::SpaceAccessibility< mirror_space , Kokkos::HostSpace >::accessible , "" ); } -/*TEST_F( defaultdevicetype , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::DefaultExecutionSpace > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -}*/ - -//---------------------------------------------------------------------------- - - -TEST_F( defaultdevicetype , view_aggregate ) -{ - TestViewAggregate< Kokkos::DefaultExecutionSpace >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , scan ) -{ - TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 ); - TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); - TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); - Kokkos::DefaultExecutionSpace::fence(); -} - - -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); -} - - -//---------------------------------------------------------------------------- -TEST_F( defaultdevicetype , cxx11 ) -{ - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) ); -} - -TEST_F( defaultdevicetype , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) ); -} - -TEST_F( defaultdevicetype , malloc ) -{ - int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int)); - ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int))); - Kokkos::kokkos_free(data); - - int* data2 = (int*) Kokkos::kokkos_malloc(0); - ASSERT_TRUE(data2==NULL); - Kokkos::kokkos_free(data2); +TEST_F( defaultdevicetype, view_api) { + TestViewAPI< double , Kokkos::DefaultExecutionSpace >(); } } // namespace test diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index a17ed97a9ff4130a2ca2ea087b400e9595c69dd9..caeb56c9e179416ec23a8d17582fa013e0896e0b 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -84,8 +84,8 @@ namespace Impl { } #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { nthreads = 1; } #endif @@ -98,8 +98,8 @@ namespace Impl { if(Kokkos::hwloc::available()) numa = Kokkos::hwloc::get_available_numa_count(); #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { numa = 1; } #endif @@ -142,8 +142,8 @@ namespace Impl { * Kokkos::hwloc::get_available_numa_count(); } #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { nthreads = 1; } #endif @@ -156,8 +156,8 @@ namespace Impl { if(Kokkos::hwloc::available()) numa = Kokkos::hwloc::get_available_numa_count(); #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - Kokkos::Impl::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { + if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || + std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { numa = 1; } #endif @@ -184,7 +184,7 @@ namespace Impl { * Kokkos::hwloc::get_available_threads_per_core(); } else { #ifdef KOKKOS_HAVE_OPENMP - if(Kokkos::Impl::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) { + if(std::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) { expected_nthreads = omp_get_max_threads(); } else #endif @@ -192,8 +192,8 @@ namespace Impl { } #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || - Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) + if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || + std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) expected_nthreads = 1; #endif } @@ -206,15 +206,15 @@ namespace Impl { expected_numa = 1; } #ifdef KOKKOS_HAVE_SERIAL - if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || - Kokkos::Impl::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) + if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || + std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) expected_numa = 1; #endif } ASSERT_EQ(Kokkos::HostSpace::execution_space::thread_pool_size(),expected_nthreads); #ifdef KOKKOS_HAVE_CUDA - if(Kokkos::Impl::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) { + if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) { int device; cudaGetDevice( &device ); int expected_device = argstruct.device_id; diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp index c15f81223329eaa749d84fbef28340638fd3c835..185c1b791800867f9e9c2113dce40a714c3ac60a 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp @@ -67,8 +67,8 @@ protected: }; -TEST_F( defaultdevicetype, reduce_instantiation) { - TestReduceCombinatoricalInstantiation<>::execute(); +TEST_F( defaultdevicetype, reduce_instantiation_a) { + TestReduceCombinatoricalInstantiation<>::execute_a(); } } // namespace test diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp similarity index 78% rename from lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp rename to lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp index be0134460b279f0cbb5f0bc1efda36863c0342ca..9aa5401871c5f0c1208c83a2370e958f4e7e1115 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp @@ -41,35 +41,36 @@ //@HEADER */ -#ifndef KOKKOS_HBW_ALLOCATORS_HPP -#define KOKKOS_HBW_ALLOCATORS_HPP +#include <gtest/gtest.h> -#ifdef KOKKOS_HAVE_HBWSPACE +#include <Kokkos_Core.hpp> -namespace Kokkos { -namespace Experimental { -namespace Impl { +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- + +#include <TestReduce.hpp> -/// class MallocAllocator -class HBWMallocAllocator -{ -public: - static const char * name() - { - return "HBW Malloc Allocator"; - } - static void* allocate(size_t size); +namespace Test { - static void deallocate(void * ptr, size_t size); +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::initialize(); + } - static void * reallocate(void * old_ptr, size_t old_size, size_t new_size); + static void TearDownTestCase() + { + Kokkos::finalize(); + } }; + +TEST_F( defaultdevicetype, reduce_instantiation_b) { + TestReduceCombinatoricalInstantiation<>::execute_b(); } -} -} // namespace Kokkos::Impl -#endif //KOKKOS_HAVE_HBWSPACE -#endif //KOKKOS_HBW_ALLOCATORS_HPP +} // namespace test +#endif diff --git a/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp similarity index 67% rename from lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp rename to lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp index 575f2f2c254ecae81132c8e5f714e4fe6e71c14f..585658909225e25ea1f74646626923ffe2150920 100644 --- a/lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp @@ -43,58 +43,34 @@ #include <gtest/gtest.h> -#include <iostream> #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- -namespace { +#include <TestReduce.hpp> -template<class Arg1> -class TestMemorySpace { -public: - typedef typename Arg1::memory_space MemorySpace; - TestMemorySpace() { run_test(); } +namespace Test { - void run_test() +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() { + Kokkos::initialize(); + } -#if ! KOKKOS_USING_EXP_VIEW - - Kokkos::View<int* ,Arg1> invalid; - ASSERT_EQ(0u, invalid.tracker().ref_count() ); - - { - Kokkos::View<int* ,Arg1> a("A",10); - - ASSERT_EQ(1u, a.tracker().ref_count() ); - - { - Kokkos::View<int* ,Arg1> b = a; - ASSERT_EQ(2u, b.tracker().ref_count() ); - - Kokkos::View<int* ,Arg1> D("D",10); - ASSERT_EQ(1u, D.tracker().ref_count() ); - - { - Kokkos::View<int* ,Arg1> E("E",10); - ASSERT_EQ(1u, E.tracker().ref_count() ); - } - - ASSERT_EQ(2u, b.tracker().ref_count() ); - } - ASSERT_EQ(1u, a.tracker().ref_count() ); - } - -#endif - + static void TearDownTestCase() + { + Kokkos::finalize(); } }; -} - -/*--------------------------------------------------------------------------*/ +TEST_F( defaultdevicetype, reduce_instantiation_c) { + TestReduceCombinatoricalInstantiation<>::execute_c(); +} +} // namespace test +#endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2659b5c380f8ea79ba99c93b09104c70652dfae9 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp @@ -0,0 +1,237 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__) +//---------------------------------------------------------------------------- + +#include <TestAtomic.hpp> + +#include <TestViewAPI.hpp> + +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestTeam.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestCXX11.hpp> +#include <TestTeamVector.hpp> +#include <TestUtilities.hpp> + +namespace Test { + +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::initialize(); + } + + static void TearDownTestCase() + { + Kokkos::finalize(); + } +}; + +TEST_F( defaultdevicetype, test_utilities) { + test_utilities(); +} + +TEST_F( defaultdevicetype, long_reduce) { + TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, double_reduce) { + TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 ); +} + +TEST_F( defaultdevicetype, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 ); +} + + +TEST_F( defaultdevicetype , atomics ) +{ + const int loop_count = 1e4 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) ); +} + +/*TEST_F( defaultdevicetype , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::DefaultExecutionSpace > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::DefaultExecutionSpace > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::DefaultExecutionSpace > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +}*/ + +//---------------------------------------------------------------------------- + + +TEST_F( defaultdevicetype , view_aggregate ) +{ + TestViewAggregate< Kokkos::DefaultExecutionSpace >(); +} + +//---------------------------------------------------------------------------- + +TEST_F( defaultdevicetype , scan ) +{ + TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 ); + TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); + TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); + Kokkos::DefaultExecutionSpace::fence(); +} + + +//---------------------------------------------------------------------------- + +TEST_F( defaultdevicetype , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); +} + + +//---------------------------------------------------------------------------- +TEST_F( defaultdevicetype , cxx11 ) +{ + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) ); +} + +TEST_F( defaultdevicetype , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) ); +} + +TEST_F( defaultdevicetype , malloc ) +{ + int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int)); + ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int))); + Kokkos::kokkos_free(data); + + int* data2 = (int*) Kokkos::kokkos_malloc(0); + ASSERT_TRUE(data2==NULL); + Kokkos::kokkos_free(data2); +} + +} // namespace test + +#endif diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp index cf650b0bc8baa1949643a57ffff808c83f406286..f83f390ac63b57269270c7317f76392c31d2165b 100644 --- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -55,21 +55,28 @@ //#define TESTMEMORYPOOL_PRINT //#define TESTMEMORYPOOL_PRINT_STATUS +#define STRIDE 1 #ifdef KOKKOS_HAVE_CUDA -#define STRIDE 32 +#define STRIDE_ALLOC 32 #else -#define STRIDE 1 +#define STRIDE_ALLOC 1 #endif namespace TestMemoryPool { struct pointer_obj { uint64_t * ptr; + + KOKKOS_INLINE_FUNCTION + pointer_obj() : ptr( 0 ) {} }; struct pointer_obj2 { void * ptr; size_t size; + + KOKKOS_INLINE_FUNCTION + pointer_obj2() : ptr( 0 ), size( 0 ) {} }; template < typename PointerView, typename Allocator > @@ -86,14 +93,14 @@ struct allocate_memory { : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m ) { // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( num_ptrs * STRIDE, *this ); + Kokkos::parallel_for( num_ptrs * STRIDE_ALLOC, *this ); } KOKKOS_INLINE_FUNCTION void operator()( size_type i ) const { - if ( i % STRIDE == 0 ) { - m_pointers[i / STRIDE].ptr = + if ( i % STRIDE_ALLOC == 0 ) { + m_pointers[i / STRIDE_ALLOC].ptr = static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) ); } } @@ -231,14 +238,14 @@ struct allocate_deallocate_memory { m_mempool( m ) { // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( work_size * STRIDE, *this ); + Kokkos::parallel_for( work_size * STRIDE_ALLOC, *this ); } KOKKOS_INLINE_FUNCTION void operator()( size_type i ) const { - if ( i % STRIDE == 0 ) { - unsigned my_work = m_work[i / STRIDE]; + if ( i % STRIDE_ALLOC == 0 ) { + unsigned my_work = m_work[i / STRIDE_ALLOC]; if ( ( my_work & 1 ) == 0 ) { // Allocation. @@ -303,7 +310,7 @@ bool test_mempool( size_t chunk_size, size_t total_size ) typedef Kokkos::View< pointer_obj *, device_type > pointer_view; typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; - uint64_t result; + uint64_t result = 0; size_t num_chunks = total_size / chunk_size; bool return_val = true; @@ -805,16 +812,9 @@ void test_memory_exhaustion() } -#ifdef TESTMEMORYPOOL_PRINT #undef TESTMEMORYPOOL_PRINT -#endif - -#ifdef TESTMEMORYPOOL_PRINT_STATUS #undef TESTMEMORYPOOL_PRINT_STATUS -#endif - -#ifdef STRIDE #undef STRIDE -#endif +#undef STRIDE_ALLOC #endif diff --git a/lib/kokkos/core/unit_test/TestOpenMP_c.cpp b/lib/kokkos/core/unit_test/TestOpenMP_c.cpp deleted file mode 100644 index f0cdabe913b8a4125fc5a1541823328d749759bf..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestOpenMP_c.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemoryPool.hpp> -#include <TestTaskPolicy.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - -#include <TestPolicyConstruction.hpp> - - -namespace Test { - -class openmp : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -TEST_F( openmp , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::OpenMP > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -} - -//---------------------------------------------------------------------------- - - -TEST_F( openmp , view_aggregate ) -{ - TestViewAggregate< Kokkos::OpenMP >(); - TestViewAggregateReduction< Kokkos::OpenMP >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp , scan ) -{ - TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 ); - TestScan< Kokkos::OpenMP >( 1000000 ); - TestScan< Kokkos::OpenMP >( 10000000 ); - Kokkos::OpenMP::fence(); -} - - -TEST_F( openmp , team_scan ) -{ - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp , memory_space ) -{ - TestMemorySpace< Kokkos::OpenMP >(); -} - -TEST_F( openmp , memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp , template_meta_functions ) -{ - TestTemplateMetaFunctions<int, Kokkos::OpenMP >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) -TEST_F( openmp , cxx11 ) -{ - if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) ); - } -} -#endif - -TEST_F( openmp , reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); -} - -TEST_F( openmp , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) ); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -TEST_F( openmp , task_fib ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestFib< Kokkos::OpenMP >::run(i, (i+1)*1000000 ); - } -} - -TEST_F( openmp , task_depend ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestTaskDependence< Kokkos::OpenMP >::run(i); - } -} - -TEST_F( openmp , task_team ) -{ - TestTaskPolicy::TestTaskTeam< Kokkos::OpenMP >::run(1000); - //TestTaskPolicy::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //TODO put back after testing -} - - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ - - -} // namespace test - - - - - - diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index 049138eb07cd402140f1d509a3590eb8e3eb6104..1bb45481c9b76d6dde29ff9e9d192d5ae4531829 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -58,6 +58,10 @@ public: } private: void test_compile_time_parameters() { + { + Kokkos::Impl::expand_variadic(); + Kokkos::Impl::expand_variadic(1,2,3); + } { typedef Kokkos::RangePolicy<> policy_t; typedef typename policy_t::execution_space execution_space; diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp index 431b844c9f4e60030f546fba320088f5eecf89c5..a465f39ca8ab428b72b68c103ec3989c92fb670f 100644 --- a/lib/kokkos/core/unit_test/TestQthread.cpp +++ b/lib/kokkos/core/unit_test/TestQthread.cpp @@ -46,11 +46,8 @@ #include <Kokkos_Core.hpp> #include <Kokkos_Qthread.hpp> -#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp> - //---------------------------------------------------------------------------- -#include <TestViewImpl.hpp> #include <TestAtomic.hpp> #include <TestViewAPI.hpp> @@ -62,7 +59,7 @@ #include <TestScan.hpp> #include <TestAggregate.hpp> #include <TestCompilerMacros.hpp> -#include <TestTaskPolicy.hpp> +#include <TestTaskScheduler.hpp> // #include <TestTeamVector.hpp> namespace Test { @@ -274,14 +271,14 @@ TEST_F( qthread , team_vector ) TEST_F( qthread , task_policy ) { - TestTaskPolicy::test_task_dep< Kokkos::Qthread >( 10 ); - for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Qthread >(i); - for ( long i = 0 ; i < 35 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Qthread >(i); + TestTaskScheduler::test_task_dep< Kokkos::Qthread >( 10 ); + for ( long i = 0 ; i < 25 ; ++i ) TestTaskScheduler::test_fib< Kokkos::Qthread >(i); + for ( long i = 0 ; i < 35 ; ++i ) TestTaskScheduler::test_fib2< Kokkos::Qthread >(i); } TEST_F( qthread , task_team ) { - TestTaskPolicy::test_task_team< Kokkos::Qthread >(1000); + TestTaskScheduler::test_task_team< Kokkos::Qthread >(1000); } //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp index be8b4f90a32d96ad12ff4bf3baafd4ab8dec11ca..e342e844c7665650732a38e49063abee626a4a8c 100644 --- a/lib/kokkos/core/unit_test/TestRange.hpp +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -185,7 +185,7 @@ struct TestRange { },error); ASSERT_EQ(error,0); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) { + if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) { size_t min = N; size_t max = 0; for(int t=0; t<ExecSpace::concurrency(); t++) { @@ -219,7 +219,7 @@ struct TestRange { },error); ASSERT_EQ(error,0); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<const size_t>(4*ExecSpace::concurrency())) ) { + if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) { size_t min = N; size_t max = 0; for(int t=0; t<ExecSpace::concurrency(); t++) { diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index 53fc393bcc29e6133e4d71ffab87815b935ec9f9..a15fab17a62f2a12b8e4284a8f07aa7a2e4f1faa 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -373,8 +373,16 @@ public: for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = j % 2 ? 1 : nwork ; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); + if ( nwork == 0 ) + { + ScalarType amin( std::numeric_limits<ScalarType>::min() ); + ScalarType amax( std::numeric_limits<ScalarType>::max() ); + const ScalarType correct = (j%2) ? amax : amin; + ASSERT_EQ( (ScalarType) correct , result[i][j] ); + } else { + const unsigned long correct = j % 2 ? 1 : nwork ; + ASSERT_EQ( (ScalarType) correct , result[i][j] ); + } } } } @@ -473,13 +481,13 @@ public: //------------------------------------ - TestTripleNestedReduce( const size_type & nrows , const size_type & ncols + TestTripleNestedReduce( const size_type & nrows , const size_type & ncols , const size_type & team_size , const size_type & vector_length ) { run_test( nrows , ncols , team_size, vector_length ); } - void run_test( const size_type & nrows , const size_type & ncols + void run_test( const size_type & nrows , const size_type & ncols , const size_type & team_size, const size_type & vector_length ) { //typedef Kokkos::LayoutLeft Layout; @@ -510,7 +518,7 @@ public: } ); } ); - // Three level parallelism kernel to force caching of vector x + // Three level parallelism kernel to force caching of vector x ScalarType result = 0.0; int chunk_size = 128; Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) { @@ -541,7 +549,7 @@ public: typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; - TestTripleNestedReduce( const size_type & , const size_type + TestTripleNestedReduce( const size_type & , const size_type , const size_type & , const size_type ) { } }; @@ -1059,16 +1067,19 @@ struct TestReduceCombinatoricalInstantiation { } - static void AddLabel() { - std::string s("Std::String"); + static void execute_a() { AddPolicy(); - AddPolicy("Char Constant"); + } + + static void execute_b() { + std::string s("Std::String"); AddPolicy(s.c_str()); - AddPolicy(s); + AddPolicy("Char Constant"); } - static void execute() { - AddLabel(); + static void execute_c() { + std::string s("Std::String"); + AddPolicy(s); } }; @@ -1420,6 +1431,9 @@ struct TestReducers { if(h_values(i)<reference_min) { reference_min = h_values(i); reference_loc = i; + } else if (h_values(i) == reference_min) { + // make min unique + h_values(i) += std::numeric_limits<Scalar>::epsilon(); } } Kokkos::deep_copy(values,h_values); @@ -1484,6 +1498,9 @@ struct TestReducers { if(h_values(i)>reference_max) { reference_max = h_values(i); reference_loc = i; + } else if (h_values(i) == reference_max) { + // make max unique + h_values(i) -= std::numeric_limits<Scalar>::epsilon(); } } Kokkos::deep_copy(values,h_values); @@ -1547,13 +1564,23 @@ struct TestReducers { int reference_maxloc = -1; for(int i=0; i<N; i++) { h_values(i) = (Scalar)(rand()%100000); + } + for(int i=0; i<N; i++) { if(h_values(i)>reference_max) { reference_max = h_values(i); reference_maxloc = i; + } else if (h_values(i) == reference_max) { + // make max unique + h_values(i) -= std::numeric_limits<Scalar>::epsilon(); } + } + for(int i=0; i<N; i++) { if(h_values(i)<reference_min) { reference_min = h_values(i); reference_minloc = i; + } else if (h_values(i) == reference_min) { + // make min unique + h_values(i) += std::numeric_limits<Scalar>::epsilon(); } } Kokkos::deep_copy(values,h_values); @@ -1570,8 +1597,16 @@ struct TestReducers { Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar(minmax_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); ASSERT_EQ(minmax_scalar.min_val,reference_min); + for(int i=0; i<N; i++) { + if((i == minmax_scalar.min_loc) && (h_values(i)==reference_min)) + reference_minloc = i; + } ASSERT_EQ(minmax_scalar.min_loc,reference_minloc); ASSERT_EQ(minmax_scalar.max_val,reference_max); + for(int i=0; i<N; i++) { + if((i == minmax_scalar.max_loc) && (h_values(i)==reference_max)) + reference_maxloc = i; + } ASSERT_EQ(minmax_scalar.max_loc,reference_maxloc); value_type minmax_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ(minmax_scalar_view.min_val,reference_min); diff --git a/lib/kokkos/core/unit_test/TestSerial.cpp b/lib/kokkos/core/unit_test/TestSerial.cpp deleted file mode 100644 index d85614e66e67af2ccae9979d7f3869cbf5165c1d..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestSerial.cpp +++ /dev/null @@ -1,571 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -#include <impl/Kokkos_ViewTileLeft.hpp> -#include <TestTile.hpp> - -#include <impl/Kokkos_Serial_TaskPolicy.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewOfClass.hpp> -#include <TestViewSubview.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskPolicy.hpp> -#include <TestMemoryPool.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - -#include <TestPolicyConstruction.hpp> - -#include <TestMDRange.hpp> - -namespace Test { - -class serial : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::HostSpace::execution_space::initialize(); - } - static void TearDownTestCase() - { - Kokkos::HostSpace::execution_space::finalize(); - } -}; - -TEST_F( serial , md_range ) { - TestMDRange_2D< Kokkos::Serial >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100); -} - -TEST_F( serial , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >(); -} - -TEST_F( serial, policy_construction) { - TestRangePolicyConstruction< Kokkos::Serial >(); - TestTeamPolicyConstruction< Kokkos::Serial >(); -} - -TEST_F( serial , impl_view_mapping ) { - test_view_mapping< Kokkos::Serial >(); - test_view_mapping_subview< Kokkos::Serial >(); - test_view_mapping_operator< Kokkos::Serial >(); - TestViewMappingAtomic< Kokkos::Serial >::run(); -} - -TEST_F( serial, view_impl) { - test_view_impl< Kokkos::Serial >(); -} - -TEST_F( serial, view_api) { - TestViewAPI< double , Kokkos::Serial >(); -} - -TEST_F( serial , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::Serial >(); -} - -TEST_F( serial, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::Serial >(); -} - -TEST_F( serial , range_tag ) -{ - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); -} - -TEST_F( serial , team_tag ) -{ - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); -} - -TEST_F( serial, long_reduce) { - TestReduce< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce) { - TestReduce< double , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial , reducers ) -{ - TestReducers<int, Kokkos::Serial>::execute_integer(); - TestReducers<size_t, Kokkos::Serial>::execute_integer(); - TestReducers<double, Kokkos::Serial>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic(); -} - -TEST_F( serial, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial , scan ) -{ - TestScan< Kokkos::Serial >::test_range( 1 , 1000 ); - TestScan< Kokkos::Serial >( 10 ); - TestScan< Kokkos::Serial >( 10000 ); -} - -TEST_F( serial , team_long_reduce) { - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( serial , team_double_reduce) { - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( serial , team_shared_request) { - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) -TEST_F( serial , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); -} -#endif - -TEST_F( serial, shmem_size) { - TestShmemSize< Kokkos::Serial >(); -} - -TEST_F( serial , team_scan ) -{ - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - - -TEST_F( serial , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Serial > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -} - -//---------------------------------------------------------------------------- - -TEST_F( serial , view_aggregate ) -{ - TestViewAggregate< Kokkos::Serial >(); - TestViewAggregateReduction< Kokkos::Serial >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( serial , atomics ) -{ - const int loop_count = 1e6 ; - - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) ); -} - -TEST_F( serial , atomic_operations ) -{ - const int start = 1; //Avoid zero for division - const int end = 11; - for (int i = start; i < end; ++i) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) ); - } - -} -//---------------------------------------------------------------------------- - -TEST_F( serial, tile_layout ) -{ - TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( serial , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( serial , memory_space ) -{ - TestMemorySpace< Kokkos::Serial >(); -} - -TEST_F( serial , memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -TEST_F( serial , task_fib ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestFib< Kokkos::Serial >::run(i); - } -} - -TEST_F( serial , task_depend ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestTaskDependence< Kokkos::Serial >::run(i); - } -} - -TEST_F( serial , task_team ) -{ - TestTaskPolicy::TestTaskTeam< Kokkos::Serial >::run(1000); - //TestTaskPolicy::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing -} - -TEST_F( serial , old_task_policy ) -{ - TestTaskPolicy::test_task_dep< Kokkos::Serial >( 10 ); - // TestTaskPolicy::test_norm2< Kokkos::Serial >( 1000 ); - // for ( long i = 0 ; i < 30 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i); - // for ( long i = 0 ; i < 40 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i); - for ( long i = 0 ; i < 20 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i); - for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i); -} - -TEST_F( serial , old_task_team ) -{ - TestTaskPolicy::test_task_team< Kokkos::Serial >(1000); -} - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ - -//---------------------------------------------------------------------------- - -TEST_F( serial , template_meta_functions ) -{ - TestTemplateMetaFunctions<int, Kokkos::Serial >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) -TEST_F( serial , cxx11 ) -{ - if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) ); - } -} -#endif - -TEST_F( serial , reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Serial >(); -} - -TEST_F( serial , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) ); -} - -} // namespace test - diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp index 61166888142e7f666b303dc1c837daa34c07a00c..291f9f60e4b8050e11b653f3f3ae975f1d1e8c91 100644 --- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -62,7 +62,7 @@ struct SharedAllocDestroy { void destroy_shared_allocation() { - Kokkos::atomic_fetch_add( count , 1 ); + Kokkos::atomic_increment( count ); } }; @@ -72,11 +72,11 @@ void test_shared_alloc() { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - typedef const Kokkos::Experimental::Impl::SharedAllocationHeader Header ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker Tracker ; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ; + typedef const Kokkos::Impl::SharedAllocationHeader Header ; + typedef Kokkos::Impl::SharedAllocationTracker Tracker ; + typedef Kokkos::Impl::SharedAllocationRecord< void , void > RecordBase ; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ; static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" ); diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp index 9121dc15a17ecead1895ce1df660c1d25a2deda2..f6a3f38e3f9f3c5cd40145777364cf0e2bc3cf57 100644 --- a/lib/kokkos/core/unit_test/TestSynchronic.cpp +++ b/lib/kokkos/core/unit_test/TestSynchronic.cpp @@ -29,7 +29,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE. //#undef _WIN32_WINNT //#define _WIN32_WINNT 0x0602 -#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__) +#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__) || defined(__ARM_ARCH_8A) // Skip for now diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp index d820129e8b571fa5eac2dc7f8d5016c47cd589f4..f4341b97815b8d70956dfb85cf0d41a4f07bab4d 100644 --- a/lib/kokkos/core/unit_test/TestSynchronic.hpp +++ b/lib/kokkos/core/unit_test/TestSynchronic.hpp @@ -31,6 +31,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include <impl/Kokkos_Synchronic.hpp> #include <mutex> +#include <cmath> namespace Test { diff --git a/lib/kokkos/core/unit_test/TestTaskPolicy.hpp b/lib/kokkos/core/unit_test/TestTaskPolicy.hpp deleted file mode 100644 index 71790f6def82d50a12d37d88e0b0e7d17f28799f..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestTaskPolicy.hpp +++ /dev/null @@ -1,1145 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -#ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP -#define KOKKOS_UNITTEST_TASKPOLICY_HPP - -#include <stdio.h> -#include <iostream> -#include <cmath> -#include <Kokkos_TaskPolicy.hpp> - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace TestTaskPolicy { - -namespace { - -long eval_fib( long n ) -{ - constexpr long mask = 0x03 ; - - long fib[4] = { 0 , 1 , 1 , 2 }; - - for ( long i = 2 ; i <= n ; ++i ) { - fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; - } - - return fib[ n & mask ]; -} - -} - -template< typename Space > -struct TestFib -{ - typedef Kokkos::TaskPolicy<Space> policy_type ; - typedef Kokkos::Future<long,Space> future_type ; - typedef long value_type ; - - policy_type policy ; - future_type fib_m1 ; - future_type fib_m2 ; - const value_type n ; - - KOKKOS_INLINE_FUNCTION - TestFib( const policy_type & arg_policy , const value_type arg_n ) - : policy(arg_policy) - , fib_m1() , fib_m2() - , n( arg_n ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & , value_type & result ) - { -#if 0 - printf( "\nTestFib(%ld) %d %d\n" - , n - , int( ! fib_m1.is_null() ) - , int( ! fib_m2.is_null() ) - ); -#endif - - if ( n < 2 ) { - result = n ; - } - else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) { - result = fib_m1.get() + fib_m2.get(); - } - else { - - // Spawn new children and respawn myself to sum their results: - // Spawn lower value at higher priority as it has a shorter - // path to completion. - - fib_m2 = policy.task_spawn( TestFib(policy,n-2) - , Kokkos::TaskSingle - , Kokkos::TaskHighPriority ); - - fib_m1 = policy.task_spawn( TestFib(policy,n-1) - , Kokkos::TaskSingle ); - - Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 }; - - Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep ); - - if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) { - // High priority to retire this branch - policy.respawn( this , Kokkos::TaskHighPriority , fib_all ); - } - else { -#if 0 - printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , n - , policy.allocation_capacity() - , policy.allocated_task_count_max() - , policy.allocated_task_count_accum() - ); -#endif - Kokkos::abort("TestFib insufficient memory"); - - } - } - } - - static void run( int i , size_t MemoryCapacity = 16000 ) - { - typedef typename policy_type::memory_space memory_space ; - - enum { Log2_SuperBlockSize = 12 }; - - policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); - - future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle ); - Kokkos::wait( root_policy ); - ASSERT_EQ( eval_fib(i) , f.get() ); - -#if 0 - fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , i - , int(root_policy.template spawn_allocation_size<TestFib>()) - , int(root_policy.when_all_allocation_size(2)) - , root_policy.allocation_capacity() - , root_policy.allocated_task_count_max() - , root_policy.allocated_task_count_accum() - ); - fflush( stdout ); -#endif - } - -}; - -} // namespace TestTaskPolicy - -//---------------------------------------------------------------------------- - -namespace TestTaskPolicy { - -template< class Space > -struct TestTaskDependence { - - typedef Kokkos::TaskPolicy<Space> policy_type ; - typedef Kokkos::Future<Space> future_type ; - typedef Kokkos::View<long,Space> accum_type ; - typedef void value_type ; - - policy_type m_policy ; - accum_type m_accum ; - long m_count ; - - KOKKOS_INLINE_FUNCTION - TestTaskDependence( long n - , const policy_type & arg_policy - , const accum_type & arg_accum ) - : m_policy( arg_policy ) - , m_accum( arg_accum ) - , m_count( n ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & ) - { - enum { CHUNK = 8 }; - const int n = CHUNK < m_count ? CHUNK : m_count ; - - if ( 1 < m_count ) { - future_type f[ CHUNK ] ; - - const int inc = ( m_count + n - 1 ) / n ; - - for ( int i = 0 ; i < n ; ++i ) { - long begin = i * inc ; - long count = begin + inc < m_count ? inc : m_count - begin ; - f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle ); - } - - m_count = 0 ; - - m_policy.respawn( this , m_policy.when_all( n , f ) ); - } - else if ( 1 == m_count ) { - Kokkos::atomic_increment( & m_accum() ); - } - } - - static void run( int n ) - { - typedef typename policy_type::memory_space memory_space ; - - // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool - enum { MemoryCapacity = 16000 }; - enum { Log2_SuperBlockSize = 12 }; - policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); - - accum_type accum("accum"); - - typename accum_type::HostMirror host_accum = - Kokkos::create_mirror_view( accum ); - - policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle ); - - Kokkos::wait( policy ); - - Kokkos::deep_copy( host_accum , accum ); - - ASSERT_EQ( host_accum() , n ); - } -}; - -} // namespace TestTaskPolicy - -//---------------------------------------------------------------------------- - -namespace TestTaskPolicy { - -template< class ExecSpace > -struct TestTaskTeam { - - //enum { SPAN = 8 }; - enum { SPAN = 33 }; - //enum { SPAN = 1 }; - - typedef void value_type ; - typedef Kokkos::TaskPolicy<ExecSpace> policy_type ; - typedef Kokkos::Future<ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; - - policy_type policy ; - future_type future ; - - view_type parfor_result ; - view_type parreduce_check ; - view_type parscan_result ; - view_type parscan_check ; - const long nvalue ; - - KOKKOS_INLINE_FUNCTION - TestTaskTeam( const policy_type & arg_policy - , const view_type & arg_parfor_result - , const view_type & arg_parreduce_check - , const view_type & arg_parscan_result - , const view_type & arg_parscan_check - , const long arg_nvalue ) - : policy(arg_policy) - , future() - , parfor_result( arg_parfor_result ) - , parreduce_check( arg_parreduce_check ) - , parscan_result( arg_parscan_result ) - , parscan_check( arg_parscan_check ) - , nvalue( arg_nvalue ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & member ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - future = policy.task_spawn - ( TestTaskTeam( policy , - parfor_result , - parreduce_check, - parscan_result, - parscan_check, - begin - 1 ) - , Kokkos::TaskTeam ); - - assert( ! future.is_null() ); - - policy.respawn( this , future ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parfor_result[i] = i ; } - ); - - // test parallel_reduce without join - - long tot = 0; - long expected = (begin+end-1)*(end-begin)*0.5; - - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] = expected-tot ; } - ); - - // test parallel_reduce with join - - tot = 0; - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , [&]( long& val1, const long& val2) { val1 += val2; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] += expected-tot ; } - ); - -#if 0 - // test parallel_scan - - // Exclusive scan - Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - if ( final ) { parscan_result[i] = val; } - val += i; - } - ); - - if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i]; - } - } - - // Inclusive scan - Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - val += i; - if ( final ) { parscan_result[i] = val; } - } - ); - - if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i]; - } - } -#endif - - } - - static void run( long n ) - { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP - const unsigned memory_capacity = 400000 ; - - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); - - view_type root_parfor_result("parfor_result",n+1); - view_type root_parreduce_check("parreduce_check",n+1); - view_type root_parscan_result("parscan_result",n+1); - view_type root_parscan_check("parscan_check",n+1); - - typename view_type::HostMirror - host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); - typename view_type::HostMirror - host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); - typename view_type::HostMirror - host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); - typename view_type::HostMirror - host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); - - future_type f = root_policy.host_spawn( - TestTaskTeam( root_policy , - root_parfor_result , - root_parreduce_check , - root_parscan_result, - root_parscan_check, - n ) , - Kokkos::TaskTeam ); - - Kokkos::wait( root_policy ); - - Kokkos::deep_copy( host_parfor_result , root_parfor_result ); - Kokkos::deep_copy( host_parreduce_check , root_parreduce_check ); - Kokkos::deep_copy( host_parscan_result , root_parscan_result ); - Kokkos::deep_copy( host_parscan_check , root_parscan_check ); - - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i ; - if ( host_parfor_result(i) != answer ) { - std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " - << host_parfor_result(i) << " != " << answer << std::endl ; - } - if ( host_parreduce_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " - << host_parreduce_check(i) << " != 0" << std::endl ; - } //TODO - if ( host_parscan_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " - << host_parscan_check(i) << " != 0" << std::endl ; - } - } - } -}; - -template< class ExecSpace > -struct TestTaskTeamValue { - - enum { SPAN = 8 }; - - typedef long value_type ; - typedef Kokkos::TaskPolicy<ExecSpace> policy_type ; - typedef Kokkos::Future<value_type,ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; - - policy_type policy ; - future_type future ; - - view_type result ; - const long nvalue ; - - KOKKOS_INLINE_FUNCTION - TestTaskTeamValue( const policy_type & arg_policy - , const view_type & arg_result - , const long arg_nvalue ) - : policy(arg_policy) - , future() - , result( arg_result ) - , nvalue( arg_nvalue ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type const & member - , value_type & final ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - - future = policy.task_spawn - ( TestTaskTeamValue( policy , result , begin - 1 ) - , Kokkos::TaskTeam ); - - assert( ! future.is_null() ); - - policy.respawn( this , future ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { result[i] = i + 1 ; } - ); - - if ( member.team_rank() == 0 ) { - final = result[nvalue] ; - } - - Kokkos::memory_fence(); - } - - static void run( long n ) - { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - const unsigned memory_capacity = 100000 ; - - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); - - view_type root_result("result",n+1); - - typename view_type::HostMirror - host_result = Kokkos::create_mirror_view( root_result ); - - future_type fv = root_policy.host_spawn - ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam ); - - Kokkos::wait( root_policy ); - - Kokkos::deep_copy( host_result , root_result ); - - if ( fv.get() != n + 1 ) { - std::cerr << "TestTaskTeamValue ERROR future = " - << fv.get() << " != " << n + 1 << std::endl ; - } - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i + 1 ; - if ( host_result(i) != answer ) { - std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " - << host_result(i) << " != " << answer << std::endl ; - } - } - } -}; -} // namespace TestTaskPolicy - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace TestTaskPolicy { - -template< class ExecSpace > -struct FibChild { - - typedef long value_type ; - - Kokkos::Experimental::TaskPolicy<ExecSpace> policy ; - Kokkos::Experimental::Future<long,ExecSpace> fib_1 ; - Kokkos::Experimental::Future<long,ExecSpace> fib_2 ; - const value_type n ; - int has_nested ; - - KOKKOS_INLINE_FUNCTION - FibChild( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy - , const value_type arg_n ) - : policy(arg_policy) - , fib_1() , fib_2() - , n( arg_n ), has_nested(0) {} - - KOKKOS_INLINE_FUNCTION - void apply( value_type & result ) - { - typedef Kokkos::Experimental::Future<long,ExecSpace> future_type ; - - if ( n < 2 ) { - - has_nested = -1 ; - - result = n ; - } - else { - if ( has_nested == 0 ) { - // Spawn new children and respawn myself to sum their results: - // Spawn lower value at higher priority as it has a shorter - // path to completion. - if ( fib_2.is_null() ) { - fib_2 = policy.task_create( FibChild(policy,n-2) ); - } - - if ( ! fib_2.is_null() && fib_1.is_null() ) { - fib_1 = policy.task_create( FibChild(policy,n-1) ); - } - - if ( ! fib_1.is_null() ) { - has_nested = 2 ; - - policy.spawn( fib_2 , true /* high priority */ ); - policy.spawn( fib_1 ); - policy.add_dependence( this , fib_1 ); - policy.add_dependence( this , fib_2 ); - policy.respawn( this ); - } - else { - // Release task memory before spawning the task, - // after spawning memory cannot be released. - fib_2 = future_type(); - // Respawn when more memory is available - policy.respawn_needing_memory( this ); - } - } - else if ( has_nested == 2 ) { - - has_nested = -1 ; - - result = fib_1.get() + fib_2.get(); - -if ( false ) { - printf("FibChild %ld = fib(%ld), task_count(%d)\n" - , long(n), long(result), policy.allocated_task_count()); -} - - } - else { - printf("FibChild(%ld) execution error\n",(long)n); - Kokkos::abort("FibChild execution error"); - } - } - } -}; - -template< class ExecSpace > -struct FibChild2 { - - typedef long value_type ; - - Kokkos::Experimental::TaskPolicy<ExecSpace> policy ; - Kokkos::Experimental::Future<long,ExecSpace> fib_a ; - Kokkos::Experimental::Future<long,ExecSpace> fib_b ; - const value_type n ; - int has_nested ; - - KOKKOS_INLINE_FUNCTION - FibChild2( const Kokkos::Experimental::TaskPolicy<ExecSpace> & arg_policy - , const value_type arg_n ) - : policy(arg_policy) - , n( arg_n ), has_nested(0) {} - - KOKKOS_INLINE_FUNCTION - void apply( value_type & result ) - { - if ( 0 == has_nested ) { - if ( n < 2 ) { - - has_nested = -1 ; - - result = n ; - } - else if ( n < 4 ) { - // Spawn new children and respawn myself to sum their results: - // result = Fib(n-1) + Fib(n-2) - has_nested = 2 ; - - // Spawn lower value at higher priority as it has a shorter - // path to completion. - - policy.clear_dependence( this ); - fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-1) ) ); - fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-2) ) , true ); - policy.add_dependence( this , fib_a ); - policy.add_dependence( this , fib_b ); - policy.respawn( this ); - } - else { - // Spawn new children and respawn myself to sum their results: - // result = Fib(n-1) + Fib(n-2) - // result = ( Fib(n-2) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) ) - // result = ( ( Fib(n-3) + Fib(n-4) ) + Fib(n-3) ) + ( Fib(n-3) + Fib(n-4) ) - // result = 3 * Fib(n-3) + 2 * Fib(n-4) - has_nested = 4 ; - - // Spawn lower value at higher priority as it has a shorter - // path to completion. - - policy.clear_dependence( this ); - fib_a = policy.spawn( policy.task_create( FibChild2(policy,n-3) ) ); - fib_b = policy.spawn( policy.task_create( FibChild2(policy,n-4) ) , true ); - policy.add_dependence( this , fib_a ); - policy.add_dependence( this , fib_b ); - policy.respawn( this ); - } - } - else if ( 2 == has_nested || 4 == has_nested ) { - result = ( has_nested == 2 ) ? fib_a.get() + fib_b.get() - : 3 * fib_a.get() + 2 * fib_b.get() ; - - has_nested = -1 ; - } - else { - printf("FibChild2(%ld) execution error\n",(long)n); - Kokkos::abort("FibChild2 execution error"); - } - } -}; - -template< class ExecSpace > -void test_fib( long n , const unsigned task_max_count = 4096 ) -{ - const unsigned task_max_size = 256 ; - const unsigned task_dependence = 4 ; - - Kokkos::Experimental::TaskPolicy<ExecSpace> - policy( task_max_count - , task_max_size - , task_dependence ); - - Kokkos::Experimental::Future<long,ExecSpace> f = - policy.spawn( policy.proc_create( FibChild<ExecSpace>(policy,n) ) ); - - Kokkos::Experimental::wait( policy ); - - if ( f.get() != eval_fib(n) ) { - std::cout << "Fib(" << n << ") = " << f.get(); - std::cout << " != " << eval_fib(n); - std::cout << std::endl ; - } -} - -template< class ExecSpace > -void test_fib2( long n , const unsigned task_max_count = 1024 ) -{ - const unsigned task_max_size = 256 ; - const unsigned task_dependence = 4 ; - - Kokkos::Experimental::TaskPolicy<ExecSpace> - policy( task_max_count - , task_max_size - , task_dependence ); - - Kokkos::Experimental::Future<long,ExecSpace> f = - policy.spawn( policy.proc_create( FibChild2<ExecSpace>(policy,n) ) ); - - Kokkos::Experimental::wait( policy ); - - if ( f.get() != eval_fib(n) ) { - std::cout << "Fib2(" << n << ") = " << f.get(); - std::cout << " != " << eval_fib(n); - std::cout << std::endl ; - } -} - -//---------------------------------------------------------------------------- - -template< class ExecSpace > -struct Norm2 { - - typedef double value_type ; - - const double * const m_x ; - - Norm2( const double * x ) : m_x(x) {} - - inline - void init( double & val ) const { val = 0 ; } - - KOKKOS_INLINE_FUNCTION - void operator()( int i , double & val ) const { val += m_x[i] * m_x[i] ; } - - void apply( double & dst ) const { dst = std::sqrt( dst ); } -}; - -template< class ExecSpace > -void test_norm2( const int n ) -{ - const unsigned task_max_count = 1024 ; - const unsigned task_max_size = 256 ; - const unsigned task_dependence = 4 ; - - Kokkos::Experimental::TaskPolicy<ExecSpace> - policy( task_max_count - , task_max_size - , task_dependence ); - - double * const x = new double[n]; - - for ( int i = 0 ; i < n ; ++i ) x[i] = 1 ; - - Kokkos::RangePolicy<ExecSpace> r(0,n); - - Kokkos::Experimental::Future<double,ExecSpace> f = - Kokkos::Experimental::spawn_reduce( policy , r , Norm2<ExecSpace>(x) ); - - Kokkos::Experimental::wait( policy ); - -#if defined(PRINT) - std::cout << "Norm2: " << f.get() << std::endl ; -#endif - - delete[] x ; -} - -//---------------------------------------------------------------------------- - -template< class Space > -struct TaskDep { - - typedef int value_type ; - typedef Kokkos::Experimental::TaskPolicy< Space > policy_type ; - - const policy_type policy ; - const int input ; - - TaskDep( const policy_type & arg_p , const int arg_i ) - : policy( arg_p ), input( arg_i ) {} - - KOKKOS_INLINE_FUNCTION - void apply( int & val ) - { - val = input ; - const int num = policy.get_dependence( this ); - - for ( int i = 0 ; i < num ; ++i ) { - Kokkos::Experimental::Future<int,Space> f = policy.get_dependence( this , i ); - val += f.get(); - } - } -}; - - -template< class Space > -void test_task_dep( const int n ) -{ - enum { NTEST = 64 }; - - const unsigned task_max_count = 1024 ; - const unsigned task_max_size = 64 ; - const unsigned task_dependence = 4 ; - - Kokkos::Experimental::TaskPolicy<Space> - policy( task_max_count - , task_max_size - , task_dependence ); - - Kokkos::Experimental::Future<int,Space> f[ NTEST ]; - - for ( int i = 0 ; i < NTEST ; ++i ) { - // Create task in the "constructing" state with capacity for 'n+1' dependences - f[i] = policy.proc_create( TaskDep<Space>(policy,0) , n + 1 ); - - if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) { - Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_CONSTRUCTING"); - } - - // Only use 'n' dependences - - for ( int j = 0 ; j < n ; ++j ) { - - Kokkos::Experimental::Future<int,Space> nested = - policy.proc_create( TaskDep<Space>(policy,j+1) ); - - policy.spawn( nested ); - - // Add dependence to a "constructing" task - policy.add_dependence( f[i] , nested ); - } - - // Spawn task from the "constructing" to the "waiting" state - policy.spawn( f[i] ); - } - - const int answer = n % 2 ? n * ( ( n + 1 ) / 2 ) : ( n / 2 ) * ( n + 1 ); - - Kokkos::Experimental::wait( policy ); - - int error = 0 ; - for ( int i = 0 ; i < NTEST ; ++i ) { - if ( f[i].get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE ) { - Kokkos::Impl::throw_runtime_exception("get_task_state() != Kokkos::Experimental::TASK_STATE_COMPLETE"); - } - if ( answer != f[i].get() && 0 == error ) { - std::cout << "test_task_dep(" << n << ") ERROR at[" << i << "]" - << " answer(" << answer << ") != result(" << f[i].get() << ")" << std::endl ; - } - } -} - -//---------------------------------------------------------------------------- - -template< class ExecSpace > -struct TaskTeam { - - enum { SPAN = 8 }; - - typedef void value_type ; - typedef Kokkos::Experimental::TaskPolicy<ExecSpace> policy_type ; - typedef Kokkos::Experimental::Future<void,ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; - - policy_type policy ; - future_type future ; - - view_type result ; - const long nvalue ; - - KOKKOS_INLINE_FUNCTION - TaskTeam( const policy_type & arg_policy - , const view_type & arg_result - , const long arg_nvalue ) - : policy(arg_policy) - , future() - , result( arg_result ) - , nvalue( arg_nvalue ) - {} - - KOKKOS_INLINE_FUNCTION - void apply( const typename policy_type::member_type & member ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.get_task_state() == Kokkos::Experimental::TASK_STATE_NULL ) { - if ( member.team_rank() == 0 ) { - future = policy.spawn( policy.task_create_team( TaskTeam( policy , result , begin - 1 ) ) ); - policy.clear_dependence( this ); - policy.add_dependence( this , future ); - policy.respawn( this ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { result[i] = i + 1 ; } - ); - } -}; - -template< class ExecSpace > -struct TaskTeamValue { - - enum { SPAN = 8 }; - - typedef long value_type ; - typedef Kokkos::Experimental::TaskPolicy<ExecSpace> policy_type ; - typedef Kokkos::Experimental::Future<value_type,ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; - - policy_type policy ; - future_type future ; - - view_type result ; - const long nvalue ; - - KOKKOS_INLINE_FUNCTION - TaskTeamValue( const policy_type & arg_policy - , const view_type & arg_result - , const long arg_nvalue ) - : policy(arg_policy) - , future() - , result( arg_result ) - , nvalue( arg_nvalue ) - {} - - KOKKOS_INLINE_FUNCTION - void apply( const typename policy_type::member_type & member , value_type & final ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - - future = policy.task_create_team( TaskTeamValue( policy , result , begin - 1 ) ); - - policy.spawn( future ); - policy.add_dependence( this , future ); - policy.respawn( this ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { result[i] = i + 1 ; } - ); - - if ( member.team_rank() == 0 ) { - final = result[nvalue] ; - } - - Kokkos::memory_fence(); - } -}; - -template< class ExecSpace > -void test_task_team( long n ) -{ - typedef TaskTeam< ExecSpace > task_type ; - typedef TaskTeamValue< ExecSpace > task_value_type ; - typedef typename task_type::view_type view_type ; - typedef typename task_type::policy_type policy_type ; - - typedef typename task_type::future_type future_type ; - typedef typename task_value_type::future_type future_value_type ; - - const unsigned task_max_count = 1024 ; - const unsigned task_max_size = 256 ; - const unsigned task_dependence = 4 ; - - policy_type - policy( task_max_count - , task_max_size - , task_dependence ); - - view_type result("result",n+1); - - typename view_type::HostMirror - host_result = Kokkos::create_mirror_view( result ); - - future_type f = policy.proc_create_team( task_type( policy , result , n ) ); - - ASSERT_FALSE( f.is_null() ); - - policy.spawn( f ); - - Kokkos::Experimental::wait( policy ); - - Kokkos::deep_copy( host_result , result ); - - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i + 1 ; - if ( host_result(i) != answer ) { - std::cerr << "test_task_team void ERROR result(" << i << ") = " - << host_result(i) << " != " << answer << std::endl ; - } - } - - future_value_type fv = policy.proc_create_team( task_value_type( policy , result , n ) ); - - ASSERT_FALSE( fv.is_null() ); - - policy.spawn( fv ); - - Kokkos::Experimental::wait( policy ); - - Kokkos::deep_copy( host_result , result ); - - if ( fv.get() != n + 1 ) { - std::cerr << "test_task_team value ERROR future = " - << fv.get() << " != " << n + 1 << std::endl ; - } - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i + 1 ; - if ( host_result(i) != answer ) { - std::cerr << "test_task_team value ERROR result(" << i << ") = " - << host_result(i) << " != " << answer << std::endl ; - } - } -} - -//---------------------------------------------------------------------------- - -template< class ExecSpace > -struct TaskLatchAdd { - - typedef void value_type ; - typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace > future_type ; - - future_type latch ; - volatile int * count ; - - KOKKOS_INLINE_FUNCTION - TaskLatchAdd( const future_type & arg_latch - , volatile int * const arg_count ) - : latch( arg_latch ) - , count( arg_count ) - {} - - KOKKOS_INLINE_FUNCTION - void apply() - { - Kokkos::atomic_fetch_add( count , 1 ); - latch.add(1); - } -}; - -template< class ExecSpace > -struct TaskLatchRun { - - typedef void value_type ; - typedef Kokkos::Experimental::TaskPolicy< ExecSpace > policy_type ; - typedef Kokkos::Experimental::Future< Kokkos::Experimental::Latch , ExecSpace > future_type ; - - policy_type policy ; - int total ; - volatile int count ; - - KOKKOS_INLINE_FUNCTION - TaskLatchRun( const policy_type & arg_policy , const int arg_total ) - : policy(arg_policy), total(arg_total), count(0) {} - - KOKKOS_INLINE_FUNCTION - void apply() - { - if ( 0 == count && 0 < total ) { - future_type latch = policy.create_latch( total ); - - for ( int i = 0 ; i < total ; ++i ) { - auto f = policy.task_create( TaskLatchAdd<ExecSpace>(latch,&count) , 0 ); - if ( f.is_null() ) { - Kokkos::abort("TaskLatchAdd allocation FAILED" ); - } - - if ( policy.spawn( f ).is_null() ) { - Kokkos::abort("TaskLatcAdd spawning FAILED" ); - } - } - - policy.add_dependence( this , latch ); - policy.respawn( this ); - } - else if ( count != total ) { - printf("TaskLatchRun FAILED %d != %d\n",count,total); - } - } -}; - - -template< class ExecSpace > -void test_latch( int n ) -{ - typedef TaskLatchRun< ExecSpace > task_type ; - typedef typename task_type::policy_type policy_type ; - - // Primary + latch + n * LatchAdd - // - // This test uses several two different block sizes for allocation from the - // memory pool, so the memory size requested must be big enough to cause two - // or more superblocks to be used. Currently, the superblock size in the - // task policy is 2^16, so make the minimum requested memory size greater - // than this. - const unsigned task_max_count = n + 2 < 256 ? 256 : n + 2; - const unsigned task_max_size = 256; - const unsigned task_dependence = 4 ; - - policy_type - policy( task_max_count - , task_max_size - , task_dependence ); - - policy.spawn( policy.proc_create( TaskLatchRun<ExecSpace>(policy,n) ) ); - - wait( policy ); -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -} // namespace TestTaskPolicy - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* #ifndef KOKKOS_UNITTEST_TASKPOLICY_HPP */ - - diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1134553980f8a63351f85a86b33537a35d52644c --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -0,0 +1,551 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP +#define KOKKOS_UNITTEST_TASKSCHEDULER_HPP + +#include <stdio.h> +#include <iostream> +#include <cmath> + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace TestTaskScheduler { + +namespace { + +inline +long eval_fib( long n ) +{ + constexpr long mask = 0x03 ; + + long fib[4] = { 0 , 1 , 1 , 2 }; + + for ( long i = 2 ; i <= n ; ++i ) { + fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; + } + + return fib[ n & mask ]; +} + +} + +template< typename Space > +struct TestFib +{ + typedef Kokkos::TaskScheduler<Space> policy_type ; + typedef Kokkos::Future<long,Space> future_type ; + typedef long value_type ; + + policy_type policy ; + future_type fib_m1 ; + future_type fib_m2 ; + const value_type n ; + + KOKKOS_INLINE_FUNCTION + TestFib( const policy_type & arg_policy , const value_type arg_n ) + : policy(arg_policy) + , fib_m1() , fib_m2() + , n( arg_n ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & , value_type & result ) + { +#if 0 + printf( "\nTestFib(%ld) %d %d\n" + , n + , int( ! fib_m1.is_null() ) + , int( ! fib_m2.is_null() ) + ); +#endif + + if ( n < 2 ) { + result = n ; + } + else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) { + result = fib_m1.get() + fib_m2.get(); + } + else { + + // Spawn new children and respawn myself to sum their results: + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + fib_m2 = policy.task_spawn( TestFib(policy,n-2) + , Kokkos::TaskSingle + , Kokkos::TaskHighPriority ); + + fib_m1 = policy.task_spawn( TestFib(policy,n-1) + , Kokkos::TaskSingle ); + + Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 }; + + Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep ); + + if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) { + // High priority to retire this branch + policy.respawn( this , Kokkos::TaskHighPriority , fib_all ); + } + else { +#if 1 + printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , n + , policy.allocation_capacity() + , policy.allocated_task_count_max() + , policy.allocated_task_count_accum() + ); +#endif + Kokkos::abort("TestFib insufficient memory"); + + } + } + } + + static void run( int i , size_t MemoryCapacity = 16000 ) + { + typedef typename policy_type::memory_space memory_space ; + + enum { Log2_SuperBlockSize = 12 }; + + policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + + future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle ); + Kokkos::wait( root_policy ); + ASSERT_EQ( eval_fib(i) , f.get() ); + +#if 0 + fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , i + , int(root_policy.template spawn_allocation_size<TestFib>()) + , int(root_policy.when_all_allocation_size(2)) + , root_policy.allocation_capacity() + , root_policy.allocated_task_count_max() + , root_policy.allocated_task_count_accum() + ); + fflush( stdout ); +#endif + } + +}; + +} // namespace TestTaskScheduler + +//---------------------------------------------------------------------------- + +namespace TestTaskScheduler { + +template< class Space > +struct TestTaskDependence { + + typedef Kokkos::TaskScheduler<Space> policy_type ; + typedef Kokkos::Future<Space> future_type ; + typedef Kokkos::View<long,Space> accum_type ; + typedef void value_type ; + + policy_type m_policy ; + accum_type m_accum ; + long m_count ; + + KOKKOS_INLINE_FUNCTION + TestTaskDependence( long n + , const policy_type & arg_policy + , const accum_type & arg_accum ) + : m_policy( arg_policy ) + , m_accum( arg_accum ) + , m_count( n ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & ) + { + enum { CHUNK = 8 }; + const int n = CHUNK < m_count ? CHUNK : m_count ; + + if ( 1 < m_count ) { + future_type f[ CHUNK ] ; + + const int inc = ( m_count + n - 1 ) / n ; + + for ( int i = 0 ; i < n ; ++i ) { + long begin = i * inc ; + long count = begin + inc < m_count ? inc : m_count - begin ; + f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle ); + } + + m_count = 0 ; + + m_policy.respawn( this , m_policy.when_all( n , f ) ); + } + else if ( 1 == m_count ) { + Kokkos::atomic_increment( & m_accum() ); + } + } + + static void run( int n ) + { + typedef typename policy_type::memory_space memory_space ; + + // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool + enum { MemoryCapacity = 16000 }; + enum { Log2_SuperBlockSize = 12 }; + policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + + accum_type accum("accum"); + + typename accum_type::HostMirror host_accum = + Kokkos::create_mirror_view( accum ); + + policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle ); + + Kokkos::wait( policy ); + + Kokkos::deep_copy( host_accum , accum ); + + ASSERT_EQ( host_accum() , n ); + } +}; + +} // namespace TestTaskScheduler + +//---------------------------------------------------------------------------- + +namespace TestTaskScheduler { + +template< class ExecSpace > +struct TestTaskTeam { + + //enum { SPAN = 8 }; + enum { SPAN = 33 }; + //enum { SPAN = 1 }; + + typedef void value_type ; + typedef Kokkos::TaskScheduler<ExecSpace> policy_type ; + typedef Kokkos::Future<ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type parfor_result ; + view_type parreduce_check ; + view_type parscan_result ; + view_type parscan_check ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TestTaskTeam( const policy_type & arg_policy + , const view_type & arg_parfor_result + , const view_type & arg_parreduce_check + , const view_type & arg_parscan_result + , const view_type & arg_parscan_check + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , parfor_result( arg_parfor_result ) + , parreduce_check( arg_parreduce_check ) + , parscan_result( arg_parscan_result ) + , parscan_check( arg_parscan_check ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type & member ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + future = policy.task_spawn + ( TestTaskTeam( policy , + parfor_result , + parreduce_check, + parscan_result, + parscan_check, + begin - 1 ) + , Kokkos::TaskTeam ); + + assert( ! future.is_null() ); + + policy.respawn( this , future ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parfor_result[i] = i ; } + ); + + // test parallel_reduce without join + + long tot = 0; + long expected = (begin+end-1)*(end-begin)*0.5; + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &res) { res += parfor_result[i]; } + , tot); + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parreduce_check[i] = expected-tot ; } + ); + + // test parallel_reduce with join + + tot = 0; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &res) { res += parfor_result[i]; } + , [&]( long& val1, const long& val2) { val1 += val2; } + , tot); + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { parreduce_check[i] += expected-tot ; } + ); + + // test parallel_scan + + // Exclusive scan + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &val , const bool final ) { + if ( final ) { parscan_result[i] = val; } + val += i; + } + ); + if ( member.team_rank() == 0 ) { + for ( long i = begin ; i < end ; ++i ) { + parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i]; + } + } + + // Inclusive scan + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i, long &val , const bool final ) { + val += i; + if ( final ) { parscan_result[i] = val; } + } + ); + if ( member.team_rank() == 0 ) { + for ( long i = begin ; i < end ; ++i ) { + parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i]; + } + } + // ThreadVectorRange check + /* + long result = 0; + expected = (begin+end-1)*(end-begin)*0.5; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member , 0 , 1 ) + , [&] ( const int i , long & outerUpdate ) { + long sum_j = 0.0; + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member , end - begin ) + , [&] ( const int j , long &innerUpdate ) { + innerUpdate += begin+j; + } , sum_j ); + outerUpdate += sum_j ; + } , result ); + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { + parreduce_check[i] += result-expected ; + } + ); + */ + } + + static void run( long n ) + { + // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop + // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP + const unsigned memory_capacity = 400000 ; + + policy_type root_policy( typename policy_type::memory_space() + , memory_capacity ); + + view_type root_parfor_result("parfor_result",n+1); + view_type root_parreduce_check("parreduce_check",n+1); + view_type root_parscan_result("parscan_result",n+1); + view_type root_parscan_check("parscan_check",n+1); + + typename view_type::HostMirror + host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); + typename view_type::HostMirror + host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); + typename view_type::HostMirror + host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); + typename view_type::HostMirror + host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); + + future_type f = root_policy.host_spawn( + TestTaskTeam( root_policy , + root_parfor_result , + root_parreduce_check , + root_parscan_result, + root_parscan_check, + n ) , + Kokkos::TaskTeam ); + + Kokkos::wait( root_policy ); + + Kokkos::deep_copy( host_parfor_result , root_parfor_result ); + Kokkos::deep_copy( host_parreduce_check , root_parreduce_check ); + Kokkos::deep_copy( host_parscan_result , root_parscan_result ); + Kokkos::deep_copy( host_parscan_check , root_parscan_check ); + + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i ; + if ( host_parfor_result(i) != answer ) { + std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " + << host_parfor_result(i) << " != " << answer << std::endl ; + } + if ( host_parreduce_check(i) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " + << host_parreduce_check(i) << " != 0" << std::endl ; + } + if ( host_parscan_check(i) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " + << host_parscan_check(i) << " != 0" << std::endl ; + } + } + } +}; + +template< class ExecSpace > +struct TestTaskTeamValue { + + enum { SPAN = 8 }; + + typedef long value_type ; + typedef Kokkos::TaskScheduler<ExecSpace> policy_type ; + typedef Kokkos::Future<value_type,ExecSpace> future_type ; + typedef Kokkos::View<long*,ExecSpace> view_type ; + + policy_type policy ; + future_type future ; + + view_type result ; + const long nvalue ; + + KOKKOS_INLINE_FUNCTION + TestTaskTeamValue( const policy_type & arg_policy + , const view_type & arg_result + , const long arg_nvalue ) + : policy(arg_policy) + , future() + , result( arg_result ) + , nvalue( arg_nvalue ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type const & member + , value_type & final ) + { + const long end = nvalue + 1 ; + const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + + future = policy.task_spawn + ( TestTaskTeamValue( policy , result , begin - 1 ) + , Kokkos::TaskTeam ); + + assert( ! future.is_null() ); + + policy.respawn( this , future ); + } + return ; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) + , [&]( int i ) { result[i] = i + 1 ; } + ); + + if ( member.team_rank() == 0 ) { + final = result[nvalue] ; + } + + Kokkos::memory_fence(); + } + + static void run( long n ) + { + // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop + const unsigned memory_capacity = 100000 ; + + policy_type root_policy( typename policy_type::memory_space() + , memory_capacity ); + + view_type root_result("result",n+1); + + typename view_type::HostMirror + host_result = Kokkos::create_mirror_view( root_result ); + + future_type fv = root_policy.host_spawn + ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam ); + + Kokkos::wait( root_policy ); + + Kokkos::deep_copy( host_result , root_result ); + + if ( fv.get() != n + 1 ) { + std::cerr << "TestTaskTeamValue ERROR future = " + << fv.get() << " != " << n + 1 << std::endl ; + } + for ( long i = 0 ; i <= n ; ++i ) { + const long answer = i + 1 ; + if ( host_result(i) != answer ) { + std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " + << host_result(i) << " != " << answer << std::endl ; + } + } + } +}; +} // namespace TestTaskScheduler + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP */ + + diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index db6b0cff7e21654f7ba17b531e63fbc63deb2b06..23ad2be3f053f522d6d8b9308ba881711dd7d63a 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -376,9 +376,14 @@ public: void run_test( const size_t nteam ) { typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; - const unsigned REPEAT = 100000 ; - const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ; + unsigned Repeat; + if ( nteam == 0 ) + { + Repeat = 1; + } else { + Repeat = ( REPEAT + nteam - 1 ) / nteam ; //error here + } functor_type functor ; @@ -581,7 +586,7 @@ struct ScratchTeamFunctor { KOKKOS_INLINE_FUNCTION void operator()( const typename policy_type::member_type & ind , value_type & update ) const { - const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 2*ind.team_size() ); + const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 3*ind.team_size() ); const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT ); const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT ); @@ -645,7 +650,7 @@ struct TestScratchTeam { typename Functor::value_type error_count = 0 ; int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + - Functor::shared_int_array_type::shmem_size(2*team_size); + Functor::shared_int_array_type::shmem_size(3*team_size); int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT); Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size), Kokkos::PerThread(thread_scratch_size)) , @@ -660,76 +665,84 @@ namespace Test { template< class ExecSpace> KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16); - - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000); - - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000); - - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) { - a_team1(i) = 1000000 + i; - a_team2(i) = 2000000 + i; - a_team3(i) = 3000000 + i; - }); - team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){ - a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; - }); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16); + + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000); + + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000); + Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000); + + // The explicit types for 0 and 128 are here to test TeamThreadRange accepting different + // types for begin and end. + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,int(0),unsigned(128)), [&] (const int& i) + { + a_team1(i) = 1000000 + i; + a_team2(i) = 2000000 + i; + a_team3(i) = 3000000 + i; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + { + a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; + a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; + a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { - b_team1(i) = 1000000 + i; - b_team2(i) = 2000000 + i; - b_team3(i) = 3000000 + i; - }); - team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){ - b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + { + b_team1(i) = 1000000 + i; + b_team2(i) = 2000000 + i; + b_team3(i) = 3000000 + i; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + { + b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; + b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; + b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + }); - team.team_barrier(); - int error = 0; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) { - if(a_team1(i) != 1000000 + i) error++; - if(a_team2(i) != 2000000 + i) error++; - if(a_team3(i) != 3000000 + i) error++; - }); - team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){ - if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; - }); + team.team_barrier(); + int error = 0; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) + { + if(a_team1(i) != 1000000 + i) error++; + if(a_team2(i) != 2000000 + i) error++; + if(a_team3(i) != 3000000 + i) error++; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + { + if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; + if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; + if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { - if(b_team1(i) != 1000000 + i) error++; - if(b_team2(i) != 2000000 + i) error++; - if(b_team3(i) != 3000000 + i) error++; - }); - team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){ - if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + { + if(b_team1(i) != 1000000 + i) error++; + if(b_team2(i) != 2000000 + i) error++; + if(b_team3(i) != 3000000 + i) error++; + }); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + { + if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; + if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; + if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + }); return error; } - struct TagReduce {}; struct TagFor {}; diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 48187f036844ccfda2d186f245b1673c7ffe5fd4..d9b06c29e49d0362226168861b0d5e818d1d82f9 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -173,13 +173,15 @@ struct functor_team_for { // Accumulate value into per thread shared memory // This is non blocking - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) + { values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); }); // Wait for all memory to be written team.team_barrier (); // One thread per team executes the comparison - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; Scalar value = 0; for (int i = 0; i < 131; ++i) { @@ -213,12 +215,14 @@ struct functor_team_reduce { void operator() (typename policy_type::member_type team) const { Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) + { val += i - team.league_rank () + team.league_size () + team.team_size (); },value); team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; for (int i = 0; i < 131; ++i) { test += i - team.league_rank () + team.league_size () + team.team_size (); @@ -250,15 +254,18 @@ struct functor_team_reduce_join { Scalar value = 0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) { + , [&] (int i, Scalar& val) + { val += i - team.league_rank () + team.league_size () + team.team_size (); } - , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;} + , [&] (volatile Scalar& val, const volatile Scalar& src) + {val+=src;} , value ); team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; for (int i = 0; i < 131; ++i) { test += i - team.league_rank () + team.league_size () + team.team_size (); @@ -298,18 +305,22 @@ struct functor_team_vector_for { static_cast<unsigned int> (shmemSize)); } else { - Kokkos::single(Kokkos::PerThread(team),[&] () { + Kokkos::single(Kokkos::PerThread(team),[&] () + { values(team.team_rank ()) = 0; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) { - Kokkos::single(Kokkos::PerThread(team),[&] () { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) + { + Kokkos::single(Kokkos::PerThread(team),[&] () + { values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); }); }); team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; Scalar value = 0; for (int i = 0; i < 131; ++i) { @@ -343,12 +354,14 @@ struct functor_team_vector_reduce { void operator() (typename policy_type::member_type team) const { Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) + { val += i - team.league_rank () + team.league_size () + team.team_size (); },value); team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; for (int i = 0; i < 131; ++i) { test += i - team.league_rank () + team.league_size () + team.team_size (); @@ -379,15 +392,18 @@ struct functor_team_vector_reduce_join { Scalar value = 0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) { + , [&] (int i, Scalar& val) + { val += i - team.league_rank () + team.league_size () + team.team_size (); } - , [&] (volatile Scalar& val, const volatile Scalar& src) {val+=src;} + , [&] (volatile Scalar& val, const volatile Scalar& src) + {val+=src;} , value ); team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() { + Kokkos::single(Kokkos::PerTeam(team),[&]() + { Scalar test = 0; for (int i = 0; i < 131; ++i) { test += i - team.league_rank () + team.league_size () + team.team_size (); @@ -418,16 +434,19 @@ struct functor_vec_single { // inside a parallel_for and write to it. Scalar value = 0; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) + { value = i; // This write is violating Kokkos semantics for nested parallelism }); - Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) { + Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) + { val = 1; },value); Scalar value2 = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) { + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) + { val += value; },value2); @@ -462,11 +481,13 @@ struct functor_vec_for { flag() = 1; } else { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) + { values(13*team.team_rank() + i) = i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); }); - Kokkos::single(Kokkos::PerThread(team),[&] () { + Kokkos::single(Kokkos::PerThread(team),[&] () + { Scalar test = 0; Scalar value = 0; for (int i = 0; i < 13; ++i) { @@ -496,11 +517,13 @@ struct functor_vec_red { void operator() (typename policy_type::member_type team) const { Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) { + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) + { val += i; }, value); - Kokkos::single(Kokkos::PerThread(team),[&] () { + Kokkos::single(Kokkos::PerThread(team),[&] () + { Scalar test = 0; for(int i = 0; i < 13; i++) { test+=i; @@ -526,12 +549,15 @@ struct functor_vec_red_join { Scalar value = 1; Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13) - , [&] (int i, Scalar& val) { val *= i; } - , [&] (Scalar& val, const Scalar& src) {val*=src;} + , [&] (int i, Scalar& val) + { val *= i; } + , [&] (Scalar& val, const Scalar& src) + {val*=src;} , value ); - Kokkos::single(Kokkos::PerThread(team),[&] () { + Kokkos::single(Kokkos::PerThread(team),[&] () + { Scalar test = 1; for(int i = 0; i < 13; i++) { test*=i; @@ -554,7 +580,8 @@ struct functor_vec_scan { KOKKOS_INLINE_FUNCTION void operator() (typename policy_type::member_type team) const { - Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) { + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) + { val += i; if(final) { Scalar test = 0; diff --git a/lib/kokkos/core/unit_test/TestThreads.cpp b/lib/kokkos/core/unit_test/TestThreads.cpp deleted file mode 100644 index 93049b95dd7c75bcd88b8d6408e8a0249f905855..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestThreads.cpp +++ /dev/null @@ -1,614 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> - -#if defined( KOKKOS_HAVE_PTHREAD ) -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -#include <Threads/Kokkos_Threads_TaskPolicy.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> - -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskPolicy.hpp> -#include <TestMemoryPool.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - - -#include <TestPolicyConstruction.hpp> - -#include <TestMDRange.hpp> - -namespace Test { - -class threads : public ::testing::Test { -protected: - static void SetUpTestCase() - { - // Finalize without initialize is a no-op: - Kokkos::Threads::finalize(); - - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - unsigned threads_count = 0 ; - - // Initialize and finalize with no threads: - Kokkos::Threads::initialize( 1u ); - Kokkos::Threads::finalize(); - - threads_count = std::max( 1u , numa_count ) - * std::max( 2u , cores_per_numa * threads_per_core ); - - Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::finalize(); - - threads_count = std::max( 1u , numa_count * 2 ) - * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); - - Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::finalize(); - - // Quick attempt to verify thread start/terminate don't have race condition: - threads_count = std::max( 1u , numa_count ) - * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); - for ( unsigned i = 0 ; i < 10 ; ++i ) { - Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::sleep(); - Kokkos::Threads::wake(); - Kokkos::Threads::finalize(); - } - - Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::print_configuration( std::cout , true /* detailed */ ); - } - - static void TearDownTestCase() - { - Kokkos::Threads::finalize(); - } -}; - -TEST_F( threads , init ) { - ; -} - -TEST_F( threads , md_range ) { - TestMDRange_2D< Kokkos::Threads >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100); -} - -TEST_F( threads , dispatch ) -{ - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} -} - -TEST_F( threads , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >(); -} - -TEST_F( threads, policy_construction) { - TestRangePolicyConstruction< Kokkos::Threads >(); - TestTeamPolicyConstruction< Kokkos::Threads >(); -} - -TEST_F( threads , impl_view_mapping ) { - test_view_mapping< Kokkos::Threads >(); - test_view_mapping_subview< Kokkos::Threads >(); - test_view_mapping_operator< Kokkos::Threads >(); - TestViewMappingAtomic< Kokkos::Threads >::run(); -} - - -TEST_F( threads, view_impl) { - test_view_impl< Kokkos::Threads >(); -} - -TEST_F( threads, view_api) { - TestViewAPI< double , Kokkos::Threads >(); -} - -TEST_F( threads , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::Threads >(); -} - -TEST_F( threads, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::Threads >(); -} - - -TEST_F( threads, view_aggregate ) { - TestViewAggregate< Kokkos::Threads >(); - TestViewAggregateReduction< Kokkos::Threads >(); -} - -TEST_F( threads , range_tag ) -{ - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); -} - -TEST_F( threads , team_tag ) -{ - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); -} - -TEST_F( threads, long_reduce) { - TestReduce< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce) { - TestReduce< double , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads , reducers ) -{ - TestReducers<int, Kokkos::Threads>::execute_integer(); - TestReducers<size_t, Kokkos::Threads>::execute_integer(); - TestReducers<double, Kokkos::Threads>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic(); -} - -TEST_F( threads, team_long_reduce) { - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( threads, team_double_reduce) { - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( threads, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, team_shared_request) { - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) -TEST_F( threads, team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); -} -#endif - -TEST_F( threads, shmem_size) { - TestShmemSize< Kokkos::Threads >(); -} - -TEST_F( threads , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Threads > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -} - -//---------------------------------------------------------------------------- - -TEST_F( threads , atomics ) -{ - const int loop_count = 1e6 ; - - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,3) ) ); -} - -TEST_F( threads , atomic_operations ) -{ - const int start = 1; //Avoid zero for division - const int end = 11; - for (int i = start; i < end; ++i) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) ); - } - -} - -//---------------------------------------------------------------------------- - -#if 0 -TEST_F( threads , scan_small ) -{ - typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { - TestScanFunctor( 10 ); - TestScanFunctor( 10000 ); - } - TestScanFunctor( 1000000 ); - TestScanFunctor( 10000000 ); - - Kokkos::Threads::fence(); -} -#endif - -TEST_F( threads , scan ) -{ - TestScan< Kokkos::Threads >::test_range( 1 , 1000 ); - TestScan< Kokkos::Threads >( 1000000 ); - TestScan< Kokkos::Threads >( 10000000 ); - Kokkos::Threads::fence(); -} - -//---------------------------------------------------------------------------- - -TEST_F( threads , team_scan ) -{ - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( threads , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); -} - -TEST_F( threads , memory_space ) -{ - TestMemorySpace< Kokkos::Threads >(); -} - -TEST_F( threads , memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( threads , template_meta_functions ) -{ - TestTemplateMetaFunctions<int, Kokkos::Threads >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) -TEST_F( threads , cxx11 ) -{ - if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) ); - } -} - -TEST_F( threads , reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Threads >(); -} -#endif /* #if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) */ - -TEST_F( threads , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) ); -} - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -TEST_F( threads , task_policy ) -{ - TestTaskPolicy::test_task_dep< Kokkos::Threads >( 10 ); - - for ( long i = 0 ; i < 25 ; ++i ) { -// printf( "test_fib(): %2ld\n", i ); - TestTaskPolicy::test_fib< Kokkos::Threads >(i); - } - for ( long i = 0 ; i < 35 ; ++i ) { -// printf( "test_fib2(): %2ld\n", i ); - TestTaskPolicy::test_fib2< Kokkos::Threads >(i); - } -} - -TEST_F( threads , task_team ) -{ - TestTaskPolicy::test_task_team< Kokkos::Threads >(1000); -} - -TEST_F( threads , task_latch ) -{ - TestTaskPolicy::test_latch< Kokkos::Threads >(10); - TestTaskPolicy::test_latch< Kokkos::Threads >(1000); -} - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ - -} // namespace Test - -#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */ diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index dfb2bd81b3dec3485688f9827d3f1f7ad24ddb9d..842131debb69b54ad08fd0eb90836510be50d7ca 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -43,6 +43,7 @@ #define TEST_TILE_HPP #include <Kokkos_Core.hpp> +#include <impl/Kokkos_ViewTile.hpp> namespace TestTile { @@ -102,7 +103,7 @@ struct ReduceTileErrors if ( jtile < tile_dim1 ) { - tile_type tile = Kokkos::tile_subview( m_array , itile , jtile ); + tile_type tile = Kokkos::Experimental::tile_subview( m_array , itile , jtile ); if ( tile(0,0) != ptrdiff_t(( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { ++errors ; diff --git a/lib/kokkos/core/unit_test/TestUtilities.hpp b/lib/kokkos/core/unit_test/TestUtilities.hpp new file mode 100644 index 0000000000000000000000000000000000000000..947be03e399bee3c23f4c4f333c34c0e6a9d4d08 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestUtilities.hpp @@ -0,0 +1,306 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +inline +void test_utilities() +{ + using namespace Kokkos::Impl; + { + using i = integer_sequence<int>; + using j = make_integer_sequence<int,0>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 0u, "Error: integer_sequence.size()" ); + } + + + { + using i = integer_sequence<int,0>; + using j = make_integer_sequence<int,1>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 1u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + } + + + { + using i = integer_sequence<int,0,1>; + using j = make_integer_sequence<int,2>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 2u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2>; + using j = make_integer_sequence<int,3>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 3u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3>; + using j = make_integer_sequence<int,4>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 4u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4>; + using j = make_integer_sequence<int,5>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 5u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4,5>; + using j = make_integer_sequence<int,6>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 6u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4,5,6>; + using j = make_integer_sequence<int,7>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 7u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4,5,6,7>; + using j = make_integer_sequence<int,8>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 8u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4,5,6,7,8>; + using j = make_integer_sequence<int,9>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 9u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = integer_sequence<int,0,1,2,3,4,5,6,7,8,9>; + using j = make_integer_sequence<int,10>; + + static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( i::size() == 10u, "Error: integer_sequence.size()" ); + + static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at<9, i>::value == 9, "Error: integer_sequence_at" ); + + static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); + static_assert( at(9, i{}) == 9, "Error: at(unsigned, integer_sequence)" ); + } + + { + using i = make_integer_sequence<int, 5>; + using r = reverse_integer_sequence<i>; + using gr = integer_sequence<int, 4, 3, 2, 1, 0>; + + static_assert( std::is_same<r,gr>::value, "Error: reverse_integer_sequence" ); + } + + { + using s = make_integer_sequence<int,10>; + using e = exclusive_scan_integer_sequence<s>; + using i = inclusive_scan_integer_sequence<s>; + + using ge = integer_sequence<int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36>; + using gi = integer_sequence<int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45>; + + static_assert( e::value == 45, "Error: scan value"); + static_assert( i::value == 45, "Error: scan value"); + + static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan"); + static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan"); + } + + +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index ae4c6d2185d12bdf1f61ab66c73244e6b38bb50b..88b474db1de466d1cedfb19633b73eef28e0b943 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -55,8 +55,6 @@ namespace Test { -#if KOKKOS_USING_EXP_VIEW - template< class T , class ... P > size_t allocation_count( const Kokkos::View<T,P...> & view ) { @@ -68,19 +66,6 @@ size_t allocation_count( const Kokkos::View<T,P...> & view ) return (card <= alloc && memory_span == 400) ? alloc : 0 ; } -#else - -template< class T , class L , class D , class M , class S > -size_t allocation_count( const Kokkos::View<T,L,D,M,S> & view ) -{ - const size_t card = Kokkos::Impl::cardinality_count( view.shape() ); - const size_t alloc = view.capacity(); - - return card <= alloc ? alloc : 0 ; -} - -#endif - /*--------------------------------------------------------------------------*/ template< typename T, class DeviceType> @@ -657,7 +642,6 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; } } -#if KOKKOS_USING_EXP_VIEW for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) @@ -665,7 +649,6 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > if ( & left(i0,i1,i2) != & left(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } } -#endif } }; @@ -742,14 +725,12 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > offset = j ; } -#if KOKKOS_USING_EXP_VIEW for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) { if ( & left(i0,i1) != & left(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } } -#endif } }; @@ -813,10 +794,8 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > { for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) { -#if KOKKOS_USING_EXP_VIEW if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } -#endif if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } } @@ -1021,18 +1000,10 @@ public: dx = dView4( "dx" , N0 ); dy = dView4( "dy" , N0 ); - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(1) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(1) ); - #endif dView4_unmanaged unmanaged_dx = dx; - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(1) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(1) ); - #endif dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(), dx.dimension_0(), @@ -1050,48 +1021,24 @@ public: } const_dView4 const_dx = dx ; - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(2) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); - #endif { const_dView4 const_dx2; const_dx2 = const_dx; - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(3) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); - #endif const_dx2 = dy; - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(2) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); - #endif const_dView4 const_dx3(dx); - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(3) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); - #endif dView4_unmanaged dx4_unmanaged(dx); - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(3) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(3) ); - #endif } - #if KOKKOS_USING_EXP_VIEW ASSERT_EQ( dx.use_count() , size_t(2) ); - #else - ASSERT_EQ( dx.tracker().ref_count() , size_t(2) ); - #endif ASSERT_FALSE( dx.ptr_on_device() == 0 ); @@ -1120,7 +1067,6 @@ public: // T v2 = hx(0,0) ; // Generates compile error as intended // hx(0,0) = v2 ; // Generates compile error as intended -#if ! KOKKOS_USING_EXP_VIEW // Testing with asynchronous deep copy with respect to device { size_t count = 0 ; @@ -1185,7 +1131,6 @@ public: { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } }}}} } -#endif /* #if ! KOKKOS_USING_EXP_VIEW */ // Testing with synchronous deep copy { diff --git a/lib/kokkos/core/unit_test/TestViewImpl.hpp b/lib/kokkos/core/unit_test/TestViewImpl.hpp deleted file mode 100644 index c34ef759d1dd41bbb9238ccdb37f2aa28955af6d..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestViewImpl.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <stdexcept> -#include <sstream> -#include <iostream> - -#include <Kokkos_Core.hpp> - -/*--------------------------------------------------------------------------*/ - -#if KOKKOS_USING_EXP_VIEW - -namespace Test { - -template < class Device > -void test_view_impl() {} - -} - -#else - -/*--------------------------------------------------------------------------*/ - -namespace Test { - -struct DummyMemorySpace -{ - typedef DummyMemorySpace memory_space ; - typedef unsigned size_type ; -}; - -/*--------------------------------------------------------------------------*/ - -template< class Type > -struct DefineShape { - typedef typename Kokkos::Impl::AnalyzeShape<Type>::shape type ; -}; - -template< class Type > -struct ExtractValueType { - typedef typename Kokkos::Impl::AnalyzeShape<Type>::value_type type ; -}; - -template< class Type > -struct ArrayType { typedef Type type ; }; - -template < class Device > -void test_view_impl() -{ - //typedef typename Device::memory_space memory_space ; // unused - - typedef ArrayType< int[100] >::type type_01 ; - typedef ArrayType< int* >::type type_11 ; - typedef ArrayType< int[5][6][700] >::type type_03 ; - typedef ArrayType< double*[8][9][900] >::type type_14 ; - typedef ArrayType< long** >::type type_22 ; - typedef ArrayType< short **[5][6][7] >::type type_25 ; - typedef ArrayType< const short **[5][6][7] >::type const_type_25 ; - typedef ArrayType< short***[5][6][7] >::type type_36 ; - typedef ArrayType< const short***[5][6][7] >::type const_type_36 ; - - // mfh 14 Feb 2014: With gcc 4.8.2 -Wall, this emits a warning: - // - // typedef ‘ok_const_25’ locally defined but not used [-Wunused-local-typedefs] - // - // It's unfortunate that this is the case, because the typedef is - // being used for a compile-time check! We deal with this by - // declaring an instance of ok_const_25, and marking it with - // "(void)" so that instance doesn't emit an "unused variable" - // warning. - // - // typedef typename Kokkos::Impl::StaticAssertSame< - // typename Kokkos::Impl::AnalyzeShape<type_25>::const_type , - // typename Kokkos::Impl::AnalyzeShape<const_type_25>::type - // > ok_const_25 ; - - typedef typename Kokkos::Impl::StaticAssertSame< - typename Kokkos::Impl::AnalyzeShape<type_25>::const_type, - typename Kokkos::Impl::AnalyzeShape<const_type_25>::type - > ok_const_25 ; - - typedef typename Kokkos::Impl::StaticAssertSame< - typename Kokkos::Impl::AnalyzeShape<type_36>::const_type, - typename Kokkos::Impl::AnalyzeShape<const_type_36>::type - > ok_const_36 ; - { - ok_const_25 thing_25 ; - ok_const_36 thing_36 ; - (void) thing_25 ; // silence warning for unused variable - (void) thing_36 ; // silence warning for unused variable - } - - ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_03>::type , int >::value ) ); - ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_14>::type , double >::value ) ); - ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_22>::type , long >::value ) ); - ASSERT_TRUE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , short >::value ) ); - - ASSERT_FALSE( ( Kokkos::Impl::is_same< ExtractValueType<type_36>::type , int >::value ) ); - - typedef typename DefineShape< type_01 >::type shape_01_type ; - typedef typename DefineShape< type_11 >::type shape_11_type ; - typedef typename DefineShape< type_03 >::type shape_03_type ; - typedef typename DefineShape< type_14 >::type shape_14_type ; - typedef typename DefineShape< type_22 >::type shape_22_type ; - typedef typename DefineShape< type_36 >::type shape_36_type ; - - ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_36_type::rank == 6 >::value ) ); - ASSERT_TRUE( ( Kokkos::Impl::StaticAssert< shape_03_type::rank == 3 >::value ) ); - - shape_01_type shape_01 ; shape_01_type::assign( shape_01 ); - shape_11_type shape_11 ; shape_11_type::assign( shape_11, 1000 ); - shape_03_type shape_03 ; shape_03_type::assign( shape_03 ); - shape_14_type shape_14 ; shape_14_type::assign( shape_14 , 0 ); - shape_22_type shape_22 ; shape_22_type::assign( shape_22 , 0 , 0 ); - shape_36_type shape_36 ; shape_36_type::assign( shape_36 , 10 , 20 , 30 ); - - ASSERT_TRUE( shape_01.rank_dynamic == 0u ); - ASSERT_TRUE( shape_01.rank == 1u ); - ASSERT_TRUE( shape_01.N0 == 100u ); - - ASSERT_TRUE( shape_11.rank_dynamic == 1u ); - ASSERT_TRUE( shape_11.rank == 1u ); - ASSERT_TRUE( shape_11.N0 == 1000u ); - - ASSERT_TRUE( shape_03.rank_dynamic == 0u ); - ASSERT_TRUE( shape_03.rank == 3u ); - ASSERT_TRUE( shape_03.N0 == 5u ); - ASSERT_TRUE( shape_03.N1 == 6u ); - ASSERT_TRUE( shape_03.N2 == 700u ); - - ASSERT_TRUE( shape_14.rank_dynamic == 1u ); - ASSERT_TRUE( shape_14.rank == 4u ); - ASSERT_TRUE( shape_14.N0 == 0u ); - ASSERT_TRUE( shape_14.N1 == 8u ); - ASSERT_TRUE( shape_14.N2 == 9u ); - ASSERT_TRUE( shape_14.N3 == 900u ); - - ASSERT_TRUE( shape_22.rank_dynamic == 2u ); - ASSERT_TRUE( shape_22.rank == 2u ); - ASSERT_TRUE( shape_22.N0 == 0u ); - ASSERT_TRUE( shape_22.N1 == 0u ); - - ASSERT_TRUE( shape_36.rank_dynamic == 3u ); - ASSERT_TRUE( shape_36.rank == 6u ); - ASSERT_TRUE( shape_36.N0 == 10u ); - ASSERT_TRUE( shape_36.N1 == 20u ); - ASSERT_TRUE( shape_36.N2 == 30u ); - ASSERT_TRUE( shape_36.N3 == 5u ); - ASSERT_TRUE( shape_36.N4 == 6u ); - ASSERT_TRUE( shape_36.N5 == 7u ); - - - ASSERT_TRUE( shape_01 == shape_01 ); - ASSERT_TRUE( shape_11 == shape_11 ); - ASSERT_TRUE( shape_36 == shape_36 ); - ASSERT_TRUE( shape_01 != shape_36 ); - ASSERT_TRUE( shape_22 != shape_36 ); - - //------------------------------------------------------------------------ - - typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutLeft > shape_01_left_offset ; - typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutLeft > shape_11_left_offset ; - typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutLeft > shape_03_left_offset ; - typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutLeft > shape_14_left_offset ; - typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutLeft > shape_22_left_offset ; - typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutLeft > shape_36_left_offset ; - - typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutRight > shape_01_right_offset ; - typedef Kokkos::Impl::ViewOffset< shape_11_type , Kokkos::LayoutRight > shape_11_right_offset ; - typedef Kokkos::Impl::ViewOffset< shape_03_type , Kokkos::LayoutRight > shape_03_right_offset ; - typedef Kokkos::Impl::ViewOffset< shape_14_type , Kokkos::LayoutRight > shape_14_right_offset ; - typedef Kokkos::Impl::ViewOffset< shape_22_type , Kokkos::LayoutRight > shape_22_right_offset ; - typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutRight > shape_36_right_offset ; - - ASSERT_TRUE( ! shape_01_left_offset::has_padding ); - ASSERT_TRUE( ! shape_11_left_offset::has_padding ); - ASSERT_TRUE( ! shape_03_left_offset::has_padding ); - ASSERT_TRUE( shape_14_left_offset::has_padding ); - ASSERT_TRUE( shape_22_left_offset::has_padding ); - ASSERT_TRUE( shape_36_left_offset::has_padding ); - - ASSERT_TRUE( ! shape_01_right_offset::has_padding ); - ASSERT_TRUE( ! shape_11_right_offset::has_padding ); - ASSERT_TRUE( ! shape_03_right_offset::has_padding ); - ASSERT_TRUE( ! shape_14_right_offset::has_padding ); - ASSERT_TRUE( shape_22_right_offset::has_padding ); - ASSERT_TRUE( shape_36_right_offset::has_padding ); - - //------------------------------------------------------------------------ - - typedef Kokkos::Impl::ViewOffset< shape_01_type , Kokkos::LayoutStride > shape_01_stride_offset ; - typedef Kokkos::Impl::ViewOffset< shape_36_type , Kokkos::LayoutStride > shape_36_stride_offset ; - - { - shape_01_stride_offset stride_offset_01 ; - - stride_offset_01.assign( 1, stride_offset_01.N0, 0,0,0,0,0,0,0 ); - - ASSERT_EQ( int(stride_offset_01.S[0]) , int(1) ); - ASSERT_EQ( int(stride_offset_01.S[1]) , int(stride_offset_01.N0) ); - } - - { - shape_36_stride_offset stride_offset_36 ; - - size_t str[7] ; - str[5] = 1 ; - str[4] = str[5] * stride_offset_36.N5 ; - str[3] = str[4] * stride_offset_36.N4 ; - str[2] = str[3] * stride_offset_36.N3 ; - str[1] = str[2] * 100 ; - str[0] = str[1] * 200 ; - str[6] = str[0] * 300 ; - - stride_offset_36.assign( str[0] , str[1] , str[2] , str[3] , str[4] , str[5] , str[6] , 0 , 0 ); - - ASSERT_EQ( size_t(stride_offset_36.S[6]) , size_t(str[6]) ); - ASSERT_EQ( size_t(stride_offset_36.N2) , size_t(100) ); - ASSERT_EQ( size_t(stride_offset_36.N1) , size_t(200) ); - ASSERT_EQ( size_t(stride_offset_36.N0) , size_t(300) ); - } - - //------------------------------------------------------------------------ - - { - const int rank = 6 ; - const int order[] = { 5 , 3 , 1 , 0 , 2 , 4 }; - const unsigned dim[] = { 2 , 3 , 5 , 7 , 11 , 13 }; - Kokkos::LayoutStride stride_6 = Kokkos::LayoutStride::order_dimensions( rank , order , dim ); - size_t n = 1 ; - for ( int i = 0 ; i < rank ; ++i ) { - ASSERT_EQ( size_t(dim[i]) , size_t( stride_6.dimension[i] ) ); - ASSERT_EQ( size_t(n) , size_t( stride_6.stride[ order[i] ] ) ); - n *= dim[order[i]] ; - } - } - - //------------------------------------------------------------------------ -} - -} /* namespace Test */ - -#endif - -/*--------------------------------------------------------------------------*/ - diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp index eddb81bed5cfaa855dc51a43d4a560bc69030543..8989ee74c8b45f2375567ab6f22abe9a943ec79c 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp @@ -99,47 +99,67 @@ void test_view_mapping() ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); - ASSERT_EQ( int(dim_0::rank) , int(0) ); - ASSERT_EQ( int(dim_0::rank_dynamic) , int(0) ); - - ASSERT_EQ( int(dim_s2::rank) , int(1) ); - ASSERT_EQ( int(dim_s2::rank_dynamic) , int(0) ); - - ASSERT_EQ( int(dim_s2_s3::rank) , int(2) ); - ASSERT_EQ( int(dim_s2_s3::rank_dynamic) , int(0) ); - - ASSERT_EQ( int(dim_s2_s3_s4::rank) , int(3) ); - ASSERT_EQ( int(dim_s2_s3_s4::rank_dynamic) , int(0) ); - - ASSERT_EQ( int(dim_s0::rank) , int(1) ); - ASSERT_EQ( int(dim_s0::rank_dynamic) , int(1) ); - - ASSERT_EQ( int(dim_s0_s3::rank) , int(2) ); - ASSERT_EQ( int(dim_s0_s3::rank_dynamic) , int(1) ); - - ASSERT_EQ( int(dim_s0_s3_s4::rank) , int(3) ); - ASSERT_EQ( int(dim_s0_s3_s4::rank_dynamic) , int(1) ); - - ASSERT_EQ( int(dim_s0_s0_s4::rank) , int(3) ); - ASSERT_EQ( int(dim_s0_s0_s4::rank_dynamic) , int(2) ); - - ASSERT_EQ( int(dim_s0_s0_s0::rank) , int(3) ); - ASSERT_EQ( int(dim_s0_s0_s0::rank_dynamic) , int(3) ); - - ASSERT_EQ( int(dim_s0_s0_s0_s0::rank) , int(4) ); - ASSERT_EQ( int(dim_s0_s0_s0_s0::rank_dynamic) , int(4) ); - - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank) , int(5) ); - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank_dynamic) , int(5) ); - - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank) , int(6) ); - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(6) ); - - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) , int(7) ); - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(7) ); - - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) , int(8) ); - ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(8) ); + static_assert( int(dim_0::rank) == int(0) , "" ); + static_assert( int(dim_0::rank_dynamic) == int(0) , "" ); + static_assert( int(dim_0::ArgN0) == 1 , "" ); + static_assert( int(dim_0::ArgN1) == 1 , "" ); + static_assert( int(dim_0::ArgN2) == 1 , "" ); + + static_assert( int(dim_s2::rank) == int(1) , "" ); + static_assert( int(dim_s2::rank_dynamic) == int(0) , "" ); + static_assert( int(dim_s2::ArgN0) == 2 , "" ); + static_assert( int(dim_s2::ArgN1) == 1 , "" ); + + static_assert( int(dim_s2_s3::rank) == int(2) , "" ); + static_assert( int(dim_s2_s3::rank_dynamic) == int(0) , "" ); + static_assert( int(dim_s2_s3::ArgN0) == 2 , "" ); + static_assert( int(dim_s2_s3::ArgN1) == 3 , "" ); + static_assert( int(dim_s2_s3::ArgN2) == 1 , "" ); + + static_assert( int(dim_s2_s3_s4::rank) == int(3) , "" ); + static_assert( int(dim_s2_s3_s4::rank_dynamic) == int(0) , "" ); + static_assert( int(dim_s2_s3_s4::ArgN0) == 2 , "" ); + static_assert( int(dim_s2_s3_s4::ArgN1) == 3 , "" ); + static_assert( int(dim_s2_s3_s4::ArgN2) == 4 , "" ); + static_assert( int(dim_s2_s3_s4::ArgN3) == 1 , "" ); + + static_assert( int(dim_s0::rank) == int(1) , "" ); + static_assert( int(dim_s0::rank_dynamic) == int(1) , "" ); + + static_assert( int(dim_s0_s3::rank) == int(2) , "" ); + static_assert( int(dim_s0_s3::rank_dynamic) == int(1) , "" ); + static_assert( int(dim_s0_s3::ArgN0) == 0 , "" ); + static_assert( int(dim_s0_s3::ArgN1) == 3 , "" ); + + static_assert( int(dim_s0_s3_s4::rank) == int(3) , "" ); + static_assert( int(dim_s0_s3_s4::rank_dynamic) == int(1) , "" ); + static_assert( int(dim_s0_s3_s4::ArgN0) == 0 , "" ); + static_assert( int(dim_s0_s3_s4::ArgN1) == 3 , "" ); + static_assert( int(dim_s0_s3_s4::ArgN2) == 4 , "" ); + + static_assert( int(dim_s0_s0_s4::rank) == int(3) , "" ); + static_assert( int(dim_s0_s0_s4::rank_dynamic) == int(2) , "" ); + static_assert( int(dim_s0_s0_s4::ArgN0) == 0 , "" ); + static_assert( int(dim_s0_s0_s4::ArgN1) == 0 , "" ); + static_assert( int(dim_s0_s0_s4::ArgN2) == 4 , "" ); + + static_assert( int(dim_s0_s0_s0::rank) == int(3) , "" ); + static_assert( int(dim_s0_s0_s0::rank_dynamic) == int(3) , "" ); + + static_assert( int(dim_s0_s0_s0_s0::rank) == int(4) , "" ); + static_assert( int(dim_s0_s0_s0_s0::rank_dynamic) == int(4) , "" ); + + static_assert( int(dim_s0_s0_s0_s0_s0::rank) == int(5) , "" ); + static_assert( int(dim_s0_s0_s0_s0_s0::rank_dynamic) == int(5) , "" ); + + static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank) == int(6) , "" ); + static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(6) , "" ); + + static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) == int(7) , "" ); + static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(7) , "" ); + + static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) == int(8) , "" ); + static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(8) , "" ); dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0 d2( 2, 3, 4, 5, 6, 7, 8, 9 ); @@ -534,6 +554,13 @@ void test_view_mapping() static_assert( a_int_r1::dimension::rank == 1 , "" ); static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); + static_assert( a_int_r5::dimension::ArgN2 == 4 , "" ); + static_assert( a_int_r5::dimension::ArgN3 == 5 , "" ); + static_assert( a_int_r5::dimension::ArgN4 == 6 , "" ); + static_assert( a_int_r5::dimension::ArgN5 == 1 , "" ); + static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" ); static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" ); @@ -545,8 +572,14 @@ void test_view_mapping() static_assert( a_const_int_r5::dimension::rank == 5 , "" ); static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" ); - static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); + static_assert( a_const_int_r5::dimension::ArgN0 == 0 , "" ); + static_assert( a_const_int_r5::dimension::ArgN1 == 0 , "" ); + static_assert( a_const_int_r5::dimension::ArgN2 == 4 , "" ); + static_assert( a_const_int_r5::dimension::ArgN3 == 5 , "" ); + static_assert( a_const_int_r5::dimension::ArgN4 == 6 , "" ); + static_assert( a_const_int_r5::dimension::ArgN5 == 1 , "" ); + static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" ); static_assert( a_int_r5::dimension::rank == 5 , "" ); @@ -616,8 +649,8 @@ void test_view_mapping() { constexpr int N = 10 ; - typedef Kokkos::Experimental::View<int*,Space> T ; - typedef Kokkos::Experimental::View<const int*,Space> C ; + typedef Kokkos::View<int*,Space> T ; + typedef Kokkos::View<const int*,Space> C ; int data[N] ; @@ -669,7 +702,7 @@ void test_view_mapping() ASSERT_EQ( vr1.dimension_0() , N ); - if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ; for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); @@ -688,8 +721,8 @@ void test_view_mapping() { constexpr int N = 10 ; - typedef Kokkos::Experimental::View<int*,Space> T ; - typedef Kokkos::Experimental::View<const int*,Space> C ; + typedef Kokkos::View<int*,Space> T ; + typedef Kokkos::View<const int*,Space> C ; T vr1("vr1",N); C cr1(vr1); @@ -712,7 +745,7 @@ void test_view_mapping() ASSERT_EQ( vr1.dimension_0() , N ); - if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename Space::memory_space , Kokkos::HostSpace >::value ) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ; for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); @@ -731,8 +764,8 @@ void test_view_mapping() // Testing proper handling of zero-length allocations { constexpr int N = 0 ; - typedef Kokkos::Experimental::View<int*,Space> T ; - typedef Kokkos::Experimental::View<const int*,Space> C ; + typedef Kokkos::View<int*,Space> T ; + typedef Kokkos::View<const int*,Space> C ; T vr1("vr1",N); C cr1(vr1); @@ -771,7 +804,7 @@ void test_view_mapping() } { - typedef Kokkos::Experimental::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace> traits_t ; + typedef Kokkos::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace> traits_t ; typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dims_t ; typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride > offset_t ; @@ -802,8 +835,9 @@ void test_view_mapping() } { - typedef Kokkos::Experimental::View<int**,Space> V ; + typedef Kokkos::View<int**,Space> V ; typedef typename V::HostMirror M ; + typedef typename Kokkos::View<int**,Space>::array_layout layout_type; constexpr int N0 = 10 ; constexpr int N1 = 11 ; @@ -825,6 +859,14 @@ void test_view_mapping() ASSERT_EQ( b(i0,i1) , c(i0,i1) ); Kokkos::Experimental::resize( b , 5 , 6 ); + + for ( int i0 = 0 ; i0 < 5 ; ++i0 ) + for ( int i1 = 0 ; i1 < 6 ; ++i1 ) { + int val = 1 + i0 + i1 * N0; + ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + ASSERT_EQ( b(i0,i1) , val ); + } + Kokkos::Experimental::realloc( c , 5 , 6 ); Kokkos::Experimental::realloc( d , 5 , 6 ); @@ -834,11 +876,89 @@ void test_view_mapping() ASSERT_EQ( c.dimension_1() , 6 ); ASSERT_EQ( d.dimension_0() , 5 ); ASSERT_EQ( d.dimension_1() , 6 ); + + layout_type layout(7,8); + Kokkos::Experimental::resize( b , layout ); + for ( int i0 = 0 ; i0 < 7 ; ++i0 ) + for ( int i1 = 6 ; i1 < 8 ; ++i1 ) + b(i0,i1) = 1 + i0 + i1 * N0 ; + + for ( int i0 = 5 ; i0 < 7 ; ++i0 ) + for ( int i1 = 0 ; i1 < 8 ; ++i1 ) + b(i0,i1) = 1 + i0 + i1 * N0 ; + + for ( int i0 = 0 ; i0 < 7 ; ++i0 ) + for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { + int val = 1 + i0 + i1 * N0; + ASSERT_EQ( b(i0,i1) , val ); + } + + Kokkos::Experimental::realloc( c , layout ); + Kokkos::Experimental::realloc( d , layout ); + + ASSERT_EQ( b.dimension_0() , 7 ); + ASSERT_EQ( b.dimension_1() , 8 ); + ASSERT_EQ( c.dimension_0() , 7 ); + ASSERT_EQ( c.dimension_1() , 8 ); + ASSERT_EQ( d.dimension_0() , 7 ); + ASSERT_EQ( d.dimension_1() , 8 ); + + } + + { + typedef Kokkos::View<int**,Kokkos::LayoutStride,Space> V ; + typedef typename V::HostMirror M ; + typedef typename Kokkos::View<int**,Kokkos::LayoutStride,Space>::array_layout layout_type; + + constexpr int N0 = 10 ; + constexpr int N1 = 11 ; + + const int dimensions[] = {N0,N1}; + const int order[] = {1,0}; + + V a("a",Kokkos::LayoutStride::order_dimensions(2,order,dimensions)); + M b = Kokkos::Experimental::create_mirror(a); + M c = Kokkos::Experimental::create_mirror_view(a); + M d ; + + for ( int i0 = 0 ; i0 < N0 ; ++i0 ) + for ( int i1 = 0 ; i1 < N1 ; ++i1 ) + b(i0,i1) = 1 + i0 + i1 * N0 ; + + Kokkos::Experimental::deep_copy( a , b ); + Kokkos::Experimental::deep_copy( c , a ); + + for ( int i0 = 0 ; i0 < N0 ; ++i0 ) + for ( int i1 = 0 ; i1 < N1 ; ++i1 ) + ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + + const int dimensions2[] = {7,8}; + const int order2[] = {1,0}; + layout_type layout = layout_type::order_dimensions(2,order2,dimensions2); + Kokkos::Experimental::resize( b , layout ); + + for ( int i0 = 0 ; i0 < 7 ; ++i0 ) + for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { + int val = 1 + i0 + i1 * N0; + ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + ASSERT_EQ( b(i0,i1) , val ); + } + + Kokkos::Experimental::realloc( c , layout ); + Kokkos::Experimental::realloc( d , layout ); + + ASSERT_EQ( b.dimension_0() , 7 ); + ASSERT_EQ( b.dimension_1() , 8 ); + ASSERT_EQ( c.dimension_0() , 7 ); + ASSERT_EQ( c.dimension_1() , 8 ); + ASSERT_EQ( d.dimension_0() , 7 ); + ASSERT_EQ( d.dimension_1() , 8 ); + } { - typedef Kokkos::Experimental::View<int*,Space> V ; - typedef Kokkos::Experimental::View<int*,Space,Kokkos::MemoryUnmanaged> U ; + typedef Kokkos::View<int*,Space> V ; + typedef Kokkos::View<int*,Space,Kokkos::MemoryUnmanaged> U ; V a("a",10); @@ -873,10 +993,10 @@ void test_view_mapping() ASSERT_EQ( a.use_count() , 1 ); ASSERT_EQ( b.use_count() , 0 ); -#if KOKKOS_USING_EXP_VIEW && ! defined ( KOKKOS_CUDA_USE_LAMBDA ) +#if ! defined ( KOKKOS_CUDA_USE_LAMBDA ) /* Cannot launch host lambda when CUDA lambda is enabled */ - typedef typename Kokkos::Impl::is_space< Space >::host_execution_space + typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space ; Kokkos::parallel_for( @@ -904,31 +1024,31 @@ struct TestViewMappingSubview typedef Kokkos::pair<int,int> range ; enum { AN = 10 }; - typedef Kokkos::Experimental::View<int*,ExecSpace> AT ; - typedef Kokkos::Experimental::View<const int*,ExecSpace> ACT ; - typedef Kokkos::Experimental::Subview< AT , range > AS ; + typedef Kokkos::View<int*,ExecSpace> AT ; + typedef Kokkos::View<const int*,ExecSpace> ACT ; + typedef Kokkos::Subview< AT , range > AS ; enum { BN0 = 10 , BN1 = 11 , BN2 = 12 }; - typedef Kokkos::Experimental::View<int***,ExecSpace> BT ; - typedef Kokkos::Experimental::Subview< BT , range , range , range > BS ; + typedef Kokkos::View<int***,ExecSpace> BT ; + typedef Kokkos::Subview< BT , range , range , range > BS ; enum { CN0 = 10 , CN1 = 11 , CN2 = 12 }; - typedef Kokkos::Experimental::View<int***[13][14],ExecSpace> CT ; - typedef Kokkos::Experimental::Subview< CT , range , range , range , int , int > CS ; + typedef Kokkos::View<int***[13][14],ExecSpace> CT ; + typedef Kokkos::Subview< CT , range , range , range , int , int > CS ; enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 }; - typedef Kokkos::Experimental::View<int***[DN3][DN4],ExecSpace> DT ; - typedef Kokkos::Experimental::Subview< DT , int , range , range , range , int > DS ; + typedef Kokkos::View<int***[DN3][DN4],ExecSpace> DT ; + typedef Kokkos::Subview< DT , int , range , range , range , int > DS ; - typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace> DLT ; - typedef Kokkos::Experimental::Subview< DLT , range , int , int , int , int > DLS1 ; + typedef Kokkos::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace> DLT ; + typedef Kokkos::Subview< DLT , range , int , int , int , int > DLS1 ; static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); - typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutRight,ExecSpace> DRT ; - typedef Kokkos::Experimental::Subview< DRT , int , int , int , int , range > DRS1 ; + typedef Kokkos::View<int***[13][14],Kokkos::LayoutRight,ExecSpace> DRT ; + typedef Kokkos::Subview< DRT , int , int , int , int , range > DRS1 ; static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); @@ -1179,23 +1299,23 @@ void test_view_mapping_operator() { typedef typename Space::execution_space ExecSpace ; - TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run(); - - TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run(); + + TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutRight,ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run(); } /*--------------------------------------------------------------------------*/ @@ -1207,8 +1327,8 @@ struct TestViewMappingAtomic { typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait ; - typedef Kokkos::Experimental::View< int * , ExecSpace > T ; - typedef Kokkos::Experimental::View< int * , ExecSpace , mem_trait > T_atom ; + typedef Kokkos::View< int * , ExecSpace > T ; + typedef Kokkos::View< int * , ExecSpace , mem_trait > T_atom ; T x ; T_atom x_atom ; diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp index 9b23a5d5597e2260e1a73b9f9b5b6b50a911567e..381b8786bc740dfcfb922eb6ddf5443ffa7136cd 100644 --- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp +++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp @@ -122,42 +122,10 @@ void view_nested_view() } Kokkos::deep_copy( host_tracking , tracking ); -#if KOKKOS_USING_EXP_VIEW ASSERT_EQ( 0 , host_tracking(0) ); -#endif - } } -#if ! KOKKOS_USING_EXP_VIEW - -namespace Kokkos { -namespace Impl { - -template< class ExecSpace , class S > -struct ViewDefaultConstruct< ExecSpace , Test::NestedView<S> , true > -{ - typedef Test::NestedView<S> type ; - type * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { new(m_ptr+i) type(); } - - ViewDefaultConstruct( type * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } -}; - -} // namespace Impl -} // namespace Kokkos - -#endif - /*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 3846354b8c368f5c8505d84b4931a9105a6a14aa..1c2575b6f61c9fa11b28963852085960ecc420aa 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -144,7 +144,7 @@ void test_auto_1d () } for (size_type j = 0; j < numCols; ++j) { - auto X_j = Kokkos::subview (X, Kokkos::ALL(), j); + auto X_j = Kokkos::subview (X, Kokkos::ALL, j); fill_1D<decltype(X_j),Space> f4(X_j, ZERO); Kokkos::parallel_for(X_j.dimension_0(),f4); @@ -154,7 +154,7 @@ void test_auto_1d () } for (size_type jj = 0; jj < numCols; ++jj) { - auto X_jj = Kokkos::subview (X, Kokkos::ALL(), jj); + auto X_jj = Kokkos::subview (X, Kokkos::ALL, jj); fill_1D<decltype(X_jj),Space> f5(X_jj, ONE); Kokkos::parallel_for(X_jj.dimension_0(),f5); Kokkos::deep_copy (X_h, X); @@ -172,9 +172,9 @@ void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int int col = n>2?2:0; int row = m>2?2:0; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { if(a) { - Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL(),row); + Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL,row); ASSERT_TRUE( & l1da(0) == & l2d(0,row) ); if(n>1) ASSERT_TRUE( & l1da(1) == & l2d(1,row) ); @@ -185,7 +185,7 @@ void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int ASSERT_TRUE( & l1db(1) == & l2d(3,row) ); } if(c) { - Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL()); + Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL); ASSERT_TRUE( & l1dc(0) == & l2d(col,0) ); if(m>1) ASSERT_TRUE( & l1dc(1) == & l2d(col,1) ); @@ -226,7 +226,7 @@ void test_left_0() typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space > view_static_8_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_static_8_type x_static_8("x_static_left_8"); @@ -290,7 +290,7 @@ void test_left_1() typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space > view_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_type x8("x_left_8",2,3,4,5); @@ -353,7 +353,7 @@ void test_left_2() { typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_type x4("x4",2,3,4,5); @@ -417,7 +417,7 @@ void test_left_3() { typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_type xm("x4",10,5); @@ -429,7 +429,7 @@ void test_left_3() ASSERT_TRUE( & x0() == & xm(5,3) ); Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = - Kokkos::subview( xm, Kokkos::ALL(), 3 ); + Kokkos::subview( xm, Kokkos::ALL, 3 ); ASSERT_TRUE( x1.is_contiguous() ); for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) { @@ -437,7 +437,7 @@ void test_left_3() } Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = - Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() ); + Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL ); ASSERT_TRUE( ! x2.is_contiguous() ); for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) @@ -446,7 +446,7 @@ void test_left_3() } Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2c = - Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) ); + Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) ); ASSERT_TRUE( x2c.is_contiguous() ); for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) @@ -455,13 +455,13 @@ void test_left_3() } Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n1 = - Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() ); + Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL ); ASSERT_TRUE( x2_n1.dimension_0() == 0 ); ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n2 = - Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) ); + Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) ); ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); ASSERT_TRUE( x2_n2.dimension_1() == 0 ); @@ -477,7 +477,7 @@ void test_right_0() typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space > view_static_8_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_static_8_type x_static_8("x_static_right_8"); @@ -542,7 +542,7 @@ void test_right_1() typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space > view_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_type x8("x_right_8",2,3,4,5); @@ -597,7 +597,7 @@ void test_right_3() { typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ; - if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) { + if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { view_type xm("x4",10,5); @@ -609,7 +609,7 @@ void test_right_3() ASSERT_TRUE( & x0() == & xm(5,3) ); Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = - Kokkos::subview( xm, 3, Kokkos::ALL() ); + Kokkos::subview( xm, 3, Kokkos::ALL ); ASSERT_TRUE( x1.is_contiguous() ); for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) { @@ -617,7 +617,7 @@ void test_right_3() } Kokkos::View<int**,Kokkos::LayoutRight,Space> x2c = - Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() ); + Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL ); ASSERT_TRUE( x2c.is_contiguous() ); for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) @@ -626,7 +626,7 @@ void test_right_3() } Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = - Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) ); + Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) ); ASSERT_TRUE( ! x2.is_contiguous() ); for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) @@ -635,13 +635,13 @@ void test_right_3() } Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n1 = - Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() ); + Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL ); ASSERT_TRUE( x2_n1.dimension_0() == 0 ); ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n2 = - Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) ); + Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) ); ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); ASSERT_TRUE( x2_n2.dimension_1() == 0 ); @@ -711,20 +711,21 @@ void test_Check3D5D(SubView a, View b, int i0, int i1, std::pair<int,int> range2 ASSERT_TRUE( errors == 0 ); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg> +template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> void test_1d_assign_impl() { { //Breaks - Kokkos::View<int*,LayoutOrg,Space> a("A",N0); + Kokkos::View<int*,LayoutOrg,Space> a_org("A",N0); + Kokkos::View<int*,LayoutOrg,Space,MemTraits> a(a_org); Kokkos::fence(); for(int i=0; i<N0; i++) - a(i) = i; + a_org(i) = i; - Kokkos::View<int[N0],Layout,Space> a1(a); + Kokkos::View<int[N0],Layout,Space,MemTraits> a1(a); Kokkos::fence(); test_Check1D(a1,a,std::pair<int,int>(0,N0)); - Kokkos::View<int[N0],LayoutSub,Space> a2(a1); + Kokkos::View<int[N0],LayoutSub,Space,MemTraits> a2(a1); Kokkos::fence(); test_Check1D(a2,a,std::pair<int,int>(0,N0)); a1 = a; @@ -738,8 +739,8 @@ void test_1d_assign_impl() { } { // Works - Kokkos::View<int[N0],LayoutOrg,Space> a("A"); - Kokkos::View<int*,Layout,Space> a1(a); + Kokkos::View<int[N0],LayoutOrg,Space,MemTraits> a("A"); + Kokkos::View<int*,Layout,Space,MemTraits> a1(a); Kokkos::fence(); test_Check1D(a1,a,std::pair<int,int>(0,N0)); a1 = a; @@ -748,127 +749,491 @@ void test_1d_assign_impl() { } } -template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg> +template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg,class MemTraits> void test_2d_subview_3d_impl_type() { Kokkos::View<int***,LayoutOrg,Space> a_org("A",N0,N1,N2); - Kokkos::View<Type,Layout,Space> a(a_org); + Kokkos::View<Type,Layout,Space,MemTraits> a(a_org); for(int i0=0; i0<N0; i0++) for(int i1=0; i1<N1; i1++) for(int i2=0; i2<N2; i2++) - a(i0,i1,i2) = i0*1000000+i1*1000+i2; - Kokkos::View<TypeSub,LayoutSub,Space> a1; - a1 = Kokkos::subview(a,3,Kokkos::ALL(),Kokkos::ALL()); + a_org(i0,i1,i2) = i0*1000000+i1*1000+i2; + Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1; + a1 = Kokkos::subview(a,3,Kokkos::ALL,Kokkos::ALL); Kokkos::fence(); test_Check2D3D(a1,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); - Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,Kokkos::ALL(),Kokkos::ALL()); + Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,Kokkos::ALL,Kokkos::ALL); Kokkos::fence(); test_Check2D3D(a2,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg> +template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> void test_2d_subview_3d_impl_layout() { - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int** ,LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int** ,LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int* [N1][N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int** [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int** [N2],int* [N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int** [N2],int** ,LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int** [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int** [N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int** [N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int*** ,int[N1][N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int*** ,int* [N2],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_3d_impl_type<Space,int*** ,int** ,LayoutSub, Layout, LayoutOrg>(); + test_2d_subview_3d_impl_type<Space,int*** ,int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int*** ,int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,int*** ,int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_2d_subview_3d_impl_type<Space,const int** [N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int** [N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int** [N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_2d_subview_3d_impl_type<Space,const int*** ,const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int*** ,const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type<Space,const int*** ,const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); } -template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg> -void test_2d_subview_5d_impl_type() { +template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +void test_3d_subview_5d_impl_type() { Kokkos::View<int*****,LayoutOrg,Space> a_org("A",N0,N1,N2,N3,N4); - Kokkos::View<Type,Layout,Space> a(a_org); + Kokkos::View<Type,Layout,Space,MemTraits> a(a_org); for(int i0=0; i0<N0; i0++) for(int i1=0; i1<N1; i1++) for(int i2=0; i2<N2; i2++) for(int i3=0; i3<N3; i3++) for(int i4=0; i4<N4; i4++) - a(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4; - Kokkos::View<TypeSub,LayoutSub,Space> a1; - a1 = Kokkos::subview(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); + a_org(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4; + Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1; + a1 = Kokkos::subview(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); Kokkos::fence(); test_Check3D5D(a1,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); - Kokkos::View<TypeSub,LayoutSub,Space> a2(a,3,5,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); + Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); Kokkos::fence(); test_Check3D5D(a2,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg> -void test_2d_subview_5d_impl_layout() { - test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); - - test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); - - test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int** [N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); - - test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int*** [N3][N4],int*** ,LayoutSub, Layout, LayoutOrg>(); - - test_2d_subview_5d_impl_type<Space, int**** [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int**** [N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int**** [N4],int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int**** [N4],int*** ,LayoutSub, Layout, LayoutOrg>(); - - test_2d_subview_5d_impl_type<Space, int***** ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int***** ,int* [N3][N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int***** ,int** [N4],LayoutSub, Layout, LayoutOrg>(); - test_2d_subview_5d_impl_type<Space, int***** ,int*** ,LayoutSub, Layout, LayoutOrg>(); +template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +void test_3d_subview_5d_impl_layout() { + test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, int**** [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int**** [N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int**** [N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int**** [N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, int***** ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int***** ,int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int***** ,int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, int***** ,int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int**** [N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int**** [N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int**** [N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int**** [N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + + test_3d_subview_5d_impl_type<Space, const int***** ,const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int***** ,const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int***** ,const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type<Space, const int***** ,const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); } + +inline +void test_subview_legal_args_right() { + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); } -template< class Space > +inline +void test_subview_legal_args_left() { + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); + + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); +} + +} + +template< class Space, class MemTraits = void> void test_1d_assign() { - Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft, MemTraits>(); //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft ,Kokkos::LayoutLeft, MemTraits>(); //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutRight ,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>(); //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); } -template<class Space > +template<class Space, class MemTraits = void> void test_2d_subview_3d() { - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits>(); + Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); } -template<class Space > -void test_2d_subview_5d() { - Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight>(); - Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight>(); - Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft>(); - Impl::test_2d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft>(); +template<class Space, class MemTraits = void> +void test_3d_subview_5d_right() { + Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); + Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>(); +} + +template<class Space, class MemTraits = void> +void test_3d_subview_5d_left() { + Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits>(); + Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); } + + +namespace Impl { + + template<class Layout, class Space> + struct FillView_3D { + Kokkos::View<int***,Layout,Space> a; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()); + const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + (ii / a.dimension_0()) % a.dimension_1() : (ii / a.dimension_2()) % a.dimension_1(); + const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? + ii / (a.dimension_0() * a.dimension_1()) : ii % a.dimension_2(); + a(i,j,k) = 1000000 * i + 1000 * j + k; + } + }; + + template<class Layout, class Space> + struct FillView_4D { + Kokkos::View<int****,Layout,Space> a; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()*a.dimension_3()); + const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + (ii / a.dimension_0()) % a.dimension_1() : (ii / (a.dimension_2()*a.dimension_3()) % a.dimension_1()); + const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? + (ii / (a.dimension_0() * a.dimension_1())) % a.dimension_2() : (ii / a.dimension_3()) % a.dimension_2(); + const int l = std::is_same<Layout,Kokkos::LayoutRight>::value ? + ii / (a.dimension_0() * a.dimension_1() * a.dimension_2()) : ii % a.dimension_3(); + a(i,j,k,l) = 1000000 * i + 10000 * j + 100 * k + l; + } + }; + + template<class Layout, class Space, class MemTraits> + struct CheckSubviewCorrectness_3D_3D { + Kokkos::View<const int***,Layout,Space,MemTraits> a; + Kokkos::View<const int***,Layout,Space,MemTraits> b; + int offset_0,offset_2; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); + const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); + const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? + ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); + if( a(i+offset_0,j,k+offset_2) != b(i,j,k)) + Kokkos::abort("Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + } + }; + + template<class Layout, class Space, class MemTraits> + struct CheckSubviewCorrectness_3D_4D { + Kokkos::View<const int****,Layout,Space,MemTraits> a; + Kokkos::View<const int***,Layout,Space,MemTraits> b; + int offset_0,offset_2,index; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); + const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? + (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); + const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? + ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); + + int i0,i1,i2,i3; + if(std::is_same<Layout,Kokkos::LayoutLeft>::value) { + i0 = i + offset_0; + i1 = j; + i2 = k + offset_2; + i3 = index; + } else { + i0 = index; + i1 = i + offset_0; + i2 = j; + i3 = k + offset_2; + } + if( a(i0,i1,i2,i3) != b(i,j,k)) + Kokkos::abort("Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + } + }; +} + +template<class Space, class MemTraits = void> +void test_layoutleft_to_layoutleft() { + Impl::test_subview_legal_args_left(); + + { + Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,3); + Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL); + + Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill; + fill.a = a; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + + Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check; + check.a = a; + check.b = b; + check.offset_0 = 16; + check.offset_2 = 0; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + } + { + Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,5); + Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3)); + + Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill; + fill.a = a; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + + Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check; + check.a = a; + check.b = b; + check.offset_0 = 16; + check.offset_2 = 1; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + } + { + Kokkos::View<int****,Kokkos::LayoutLeft,Space> a("A",100,4,5,3); + Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3),1); + + Impl::FillView_4D<Kokkos::LayoutLeft,Space> fill; + fill.a = a; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + + Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutLeft,Space,MemTraits> check; + check.a = a; + check.b = b; + check.offset_0 = 16; + check.offset_2 = 1; + check.index = 1; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + } +} + +template<class Space, class MemTraits = void> +void test_layoutright_to_layoutright() { + Impl::test_subview_legal_args_right(); + + { + Kokkos::View<int***,Kokkos::LayoutRight,Space> a("A",100,4,3); + Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL); + + Impl::FillView_3D<Kokkos::LayoutRight,Space> fill; + fill.a = a; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + + Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutRight,Space,MemTraits> check; + check.a = a; + check.b = b; + check.offset_0 = 16; + check.offset_2 = 0; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + } + { + Kokkos::View<int****,Kokkos::LayoutRight,Space> a("A",3,4,5,100); + Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,1,Kokkos::pair<int,int>(1,3),Kokkos::ALL,Kokkos::ALL); + + + Impl::FillView_4D<Kokkos::LayoutRight,Space> fill; + fill.a = a; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + + Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutRight,Space,MemTraits> check; + check.a = a; + check.b = b; + check.offset_0 = 1; + check.offset_2 = 0; + check.index = 1; + Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + } +} + + } //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a49d9ef41ed81a9b1c8b49cfe0e338bcd75d2d3e --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp @@ -0,0 +1,107 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_TEST_CUDAHPP +#define KOKKOS_TEST_CUDAHPP +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> + +#include <Kokkos_Core.hpp> + +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskScheduler.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +// For Some Reason I can only have the definition of SetUp and TearDown in one cpp file ... +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase(); + static void TearDownTestCase(); +}; + +#ifdef TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN +void cuda::SetUpTestCase() + { + Kokkos::Cuda::print_configuration( std::cout ); + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + +void cuda::TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +#endif +} +#endif diff --git a/lib/kokkos/core/unit_test/TestCuda_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp similarity index 63% rename from lib/kokkos/core/unit_test/TestCuda_c.cpp rename to lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp index 70584cead1b5efb7b6b0b372aed95dd522c25169..113b72c70f4fb9032577a8d38a3e129fe48d86c1 100644 --- a/lib/kokkos/core/unit_test/TestCuda_c.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp @@ -40,61 +40,11 @@ // ************************************************************************ //@HEADER */ - -#include <gtest/gtest.h> - -#include <iostream> - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> -#include <impl/Kokkos_ViewTileLeft.hpp> -#include <TestTile.hpp> - -//---------------------------------------------------------------------------- - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestMemoryPool.hpp> -#include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestCXX11Deduction.hpp> - -#include <TestTaskPolicy.hpp> -#include <TestPolicyConstruction.hpp> - -//---------------------------------------------------------------------------- - -class cuda : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -//---------------------------------------------------------------------------- +#include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, atomic ) +TEST_F( cuda , atomics ) { const int loop_count = 1e3 ; @@ -133,7 +83,6 @@ TEST_F( cuda, atomic ) ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,1) ) ); ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,2) ) ); ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,3) ) ); - } TEST_F( cuda , atomic_operations ) @@ -151,6 +100,8 @@ TEST_F( cuda , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 2 ) ) ); @@ -161,6 +112,8 @@ TEST_F( cuda , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); @@ -171,6 +124,8 @@ TEST_F( cuda , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); @@ -181,6 +136,8 @@ TEST_F( cuda , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); @@ -191,6 +148,8 @@ TEST_F( cuda , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 2 ) ) ); @@ -205,171 +164,5 @@ TEST_F( cuda , atomic_operations ) } -//---------------------------------------------------------------------------- - -TEST_F( cuda, tile_layout) -{ - TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); - - TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 ); -} - -TEST_F( cuda , view_aggregate ) -{ - TestViewAggregate< Kokkos::Cuda >(); - TestViewAggregateReduction< Kokkos::Cuda >(); -} - -TEST_F( cuda , scan ) -{ - TestScan< Kokkos::Cuda >::test_range( 1 , 1000 ); - TestScan< Kokkos::Cuda >( 1000000 ); - TestScan< Kokkos::Cuda >( 10000000 ); - - TestScan< Kokkos::Cuda >( 0 ); - TestScan< Kokkos::Cuda >( 0 , 0 ); - - Kokkos::Cuda::fence(); -} - -TEST_F( cuda , team_scan ) -{ - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -TEST_F( cuda , memory_pool ) -{ -// typedef Kokkos::CudaUVMSpace device_type; - typedef Kokkos::Cuda device_type; - - bool val = TestMemoryPool::test_mempool< device_type >( 128, 128000000 ); - ASSERT_TRUE( val ); - - Kokkos::Cuda::fence(); - - TestMemoryPool::test_mempool2< device_type >( 64, 4, 100000, 200000 ); - - Kokkos::Cuda::fence(); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >(); - - Kokkos::Cuda::fence(); -} - -} - -//---------------------------------------------------------------------------- - -TEST_F( cuda , template_meta_functions ) -{ - TestTemplateMetaFunctions<int, Kokkos::Cuda >(); -} - -//---------------------------------------------------------------------------- - -namespace Test { - -TEST_F( cuda , reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); -} - -TEST_F( cuda , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) ); -} - -TEST_F( cuda, triple_nested_parallelism ) -{ - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 ); -} - -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKPOLICY ) - -TEST_F( cuda , task_fib ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestFib< Kokkos::Cuda >::run(i, (i+1)*1000000 ); - } -} - -TEST_F( cuda , task_depend ) -{ - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskPolicy::TestTaskDependence< Kokkos::Cuda >::run(i); - } -} - -TEST_F( cuda , task_team ) -{ - //TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(1000); - TestTaskPolicy::TestTaskTeam< Kokkos::Cuda >::run(104); - TestTaskPolicy::TestTaskTeamValue< Kokkos::Cuda >::run(1000); -} - -//---------------------------------------------------------------------------- - -TEST_F( cuda , old_task_policy ) -{ - TestTaskPolicy::test_task_dep< Kokkos::Cuda >( 10 ); - - for ( long i = 0 ; i < 15 ; ++i ) { - // printf("TestTaskPolicy::test_fib< Kokkos::Cuda >(%d);\n",i); - TestTaskPolicy::test_fib< Kokkos::Cuda >(i,4096); - } - for ( long i = 0 ; i < 35 ; ++i ) { - // printf("TestTaskPolicy::test_fib2< Kokkos::Cuda >(%d);\n",i); - TestTaskPolicy::test_fib2< Kokkos::Cuda >(i,4096); - } -} - -TEST_F( cuda , old_task_team ) -{ - TestTaskPolicy::test_task_team< Kokkos::Cuda >(1000); -} - -TEST_F( cuda , old_task_latch ) -{ - TestTaskPolicy::test_latch< Kokkos::Cuda >(10); - TestTaskPolicy::test_latch< Kokkos::Cuda >(1000); -} - -#endif // #if defined( KOKKOS_ENABLE_TASKPOLICY ) +} // namespace test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80de6618e62f0f439fbfba06c08578b208389997 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -0,0 +1,189 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#define TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , init ) { + ; +} + +TEST_F( cuda , md_range ) { + TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100); +} + +TEST_F( cuda, policy_construction) { + TestRangePolicyConstruction< Kokkos::Cuda >(); + TestTeamPolicyConstruction< Kokkos::Cuda >(); +} + +TEST_F( cuda , range_tag ) +{ + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); + + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); + + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); + + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); +} + + +//---------------------------------------------------------------------------- + +TEST_F( cuda , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::Cuda >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Cuda >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +TEST_F( cuda , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Cuda >::run(i, (i+1)*(i+1)*10000 ); + } +} + +TEST_F( cuda , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run(i); + } +} + +TEST_F( cuda , task_team ) +{ + TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run(1000); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run(1000); //put back after testing +} + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) +TEST_F( cuda , cxx11 ) +{ + if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Cuda >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(4) ) ); + } +} +#endif + +TEST_F( cuda, tile_layout ) +{ + TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 4 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 ); +} + +#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +#if defined (KOKKOS_COMPILER_CLANG) +TEST_F( cuda , dispatch ) +{ + const int repeat = 100 ; + for ( int i = 0 ; i < repeat ; ++i ) { + for ( int j = 0 ; j < repeat ; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >(0,j) + , KOKKOS_LAMBDA( int ) {} ); + }} +} +#endif +#endif + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b9ab9fe72d494a672cefe07f770ea38663e2ffec --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp @@ -0,0 +1,56 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , reducers ) +{ + TestReducers<int, Kokkos::Cuda>::execute_integer(); + TestReducers<size_t, Kokkos::Cuda>::execute_integer(); + TestReducers<double, Kokkos::Cuda>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c588d752dd21ef2135d1e4fa52c37f5dba0c37a9 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp @@ -0,0 +1,130 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, long_reduce) { + TestReduce< long , Kokkos::Cuda >( 0 ); + TestReduce< long , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, double_reduce) { + TestReduce< double , Kokkos::Cuda >( 0 ); + TestReduce< double , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Cuda >( 0 ); + TestReduceDynamic< long , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Cuda >( 0 ); + TestReduceDynamic< double , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Cuda >( 0 ); + TestReduceDynamicView< long , Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda , scan ) +{ + TestScan< Kokkos::Cuda >::test_range( 1 , 1000 ); + TestScan< Kokkos::Cuda >( 0 ); + TestScan< Kokkos::Cuda >( 100000 ); + TestScan< Kokkos::Cuda >( 10000000 ); + Kokkos::Cuda::fence(); +} + +#if 0 +TEST_F( cuda , scan_small ) +{ + typedef TestScan< Kokkos::Cuda , Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor ; + for ( int i = 0 ; i < 1000 ; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::Cuda::fence(); +} +#endif + +TEST_F( cuda , team_scan ) +{ + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( cuda , team_long_reduce) { + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( cuda , team_double_reduce) { + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( cuda , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3cbc3b8897625f07f7c4fc810662b68cfe907e9 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -0,0 +1,399 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +__global__ +void test_abort() +{ + Kokkos::abort("test_abort"); +} + +__global__ +void test_cuda_spaces_int_value( int * ptr ) +{ + if ( *ptr == 42 ) { *ptr = 2 * 42 ; } +} + +TEST_F( cuda , space_access ) +{ + //-------------------------------------- + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + + //-------------------------------------- + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::accessible , "" ); + + //-------------------------------------- + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + + //-------------------------------------- + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::assignable , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + + //-------------------------------------- + + static_assert( + ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::HostSpace >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaSpace >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaUVMSpace >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + + static_assert( + ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + + + static_assert( + std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space + , Kokkos::HostSpace >::value , "" ); + + static_assert( + std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space + , Kokkos::Device< Kokkos::HostSpace::execution_space + , Kokkos::CudaUVMSpace > >::value , "" ); + + static_assert( + std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space + , Kokkos::CudaHostPinnedSpace >::value , "" ); + + static_assert( + std::is_same< Kokkos::Device< Kokkos::HostSpace::execution_space + , Kokkos::CudaUVMSpace > + , Kokkos::Device< Kokkos::HostSpace::execution_space + , Kokkos::CudaUVMSpace > >::value , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility + < Kokkos::Impl::HostMirror< Kokkos::Cuda >::Space + , Kokkos::HostSpace + >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility + < Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space + , Kokkos::HostSpace + >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility + < Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space + , Kokkos::HostSpace + >::accessible , "" ); + + static_assert( + Kokkos::Impl::SpaceAccessibility + < Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space + , Kokkos::HostSpace + >::accessible , "" ); +} + +TEST_F( cuda, uvm ) +{ + if ( Kokkos::CudaUVMSpace::available() ) { + + int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int)); + + *uvm_ptr = 42 ; + + Kokkos::Cuda::fence(); + test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr); + Kokkos::Cuda::fence(); + + EXPECT_EQ( *uvm_ptr, int(2*42) ); + + Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr ); + + } +} + +TEST_F( cuda, uvm_num_allocs ) +{ + // The max number of uvm allocations allowed is 65536 + #define MAX_NUM_ALLOCS 65536 + + if ( Kokkos::CudaUVMSpace::available() ) { + + struct TestMaxUVMAllocs { + + using view_type = Kokkos::View< double* , Kokkos::CudaUVMSpace >; + using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] + , Kokkos::CudaUVMSpace >; + + TestMaxUVMAllocs() + : view_allocs_test("view_allocs_test") + { + + for ( auto i = 0; i < MAX_NUM_ALLOCS ; ++i ) { + + // Kokkos will throw a runtime exception if an attempt is made to + // allocate more than the maximum number of uvm allocations + + // In this test, the max num of allocs occurs when i = MAX_NUM_ALLOCS - 1 + // since the 'outer' view counts as one UVM allocation, leaving + // 65535 possible UVM allocations, that is 'i in [0 , 65535)' + + // The test will catch the exception thrown in this case and continue + + if ( i == ( MAX_NUM_ALLOCS - 1) ) { + EXPECT_ANY_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ; + } + else { + if(i<MAX_NUM_ALLOCS - 1000) { + EXPECT_NO_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ; + } else { // This might or might not throw depending on compilation options. + try { + view_allocs_test(i) = view_type("inner_view",1); + } + catch (...) {} + } + } + + } //end allocation for loop + + for ( auto i = 0; i < MAX_NUM_ALLOCS -1; ++i ) { + + view_allocs_test(i) = view_type(); + + } //end deallocation for loop + + view_allocs_test = view_of_view_type(); // deallocate the view of views + } + + // Member + view_of_view_type view_allocs_test ; + } ; + + // trigger the test via the TestMaxUVMAllocs constructor + TestMaxUVMAllocs() ; + + } + #undef MAX_NUM_ALLOCS +} + +template< class MemSpace , class ExecSpace > +struct TestViewCudaAccessible { + + enum { N = 1000 }; + + using V = Kokkos::View<double*,MemSpace> ; + + V m_base ; + + struct TagInit {}; + struct TagTest {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagTest & , const int i , long & error_count ) const + { if ( m_base[i] != i + 1 ) ++error_count ; } + + TestViewCudaAccessible() + : m_base("base",N) + {} + + static void run() + { + TestViewCudaAccessible self ; + Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self ); + MemSpace::execution_space::fence(); + // Next access is a different execution space, must complete prior kernel. + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count ); + EXPECT_EQ( error_count , 0 ); + } +}; + +TEST_F( cuda , impl_view_accessible ) +{ + TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run(); + + TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run(); + + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run(); +} + +template< class MemSpace > +struct TestViewCudaTexture { + + enum { N = 1000 }; + + using V = Kokkos::View<double*,MemSpace> ; + using T = Kokkos::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ; + + V m_base ; + T m_tex ; + + struct TagInit {}; + struct TagTest {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagTest & , const int i , long & error_count ) const + { if ( m_tex[i] != i + 1 ) ++error_count ; } + + TestViewCudaTexture() + : m_base("base",N) + , m_tex( m_base ) + {} + + static void run() + { + EXPECT_TRUE( ( std::is_same< typename V::reference_type + , double & + >::value ) ); + + EXPECT_TRUE( ( std::is_same< typename T::reference_type + , const double + >::value ) ); + + EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view + EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value + + TestViewCudaTexture self ; + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self ); + long error_count = -1 ; + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count ); + EXPECT_EQ( error_count , 0 ); + } +}; + + +TEST_F( cuda , impl_view_texture ) +{ + TestViewCudaTexture< Kokkos::CudaSpace >::run(); + TestViewCudaTexture< Kokkos::CudaUVMSpace >::run(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd8a647ef3f03b9d1109a464a51cd06e90de703d --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); +} + +TEST_F( cuda, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); +} + +TEST_F( cuda, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..053fcfc2095c26540ff75e545bb4f920e0a96912 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_layoutleft_to_layoutleft) { + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +TEST_F( cuda, view_subview_layoutright_to_layoutright) { + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4c5f2ef72fdd45b2b9033d54c3c83e70c3c089c1 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aee6f1730d6fb33e15877a043fe0ef8beaed11d9 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_1d_assign_atomic ) { + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2ef48c686e1d3a202aaf5f017d9ac88cc486085d --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_1d_assign_randomaccess ) { + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp similarity index 89% rename from lib/kokkos/core/src/impl/Kokkos_Singleton.hpp rename to lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp index 86bc94ab0be9e8cfd00ea5a95cebc906bd3aa312..aec123ac235ef631172b3dc7c26151d2da7e38da 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Singleton.hpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,20 +36,17 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <cuda/TestCuda.hpp> -#ifndef KOKKOS_SINGLETON_HPP -#define KOKKOS_SINGLETON_HPP - -#include <Kokkos_Macros.hpp> -#include <cstddef> - -namespace Kokkos { namespace Impl { +namespace Test { +TEST_F( cuda, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); +} -}} // namespace Kokkos::Impl +} // namespace test -#endif // KOKKOS_SINGLETON_HPP diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8ad2319963b2750e01d518309e84c7423a387d6 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_2d_from_3d_atomic ) { + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e86b4513fd8b8fdeb85c7bce130b3ae274d5e214 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) { + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ad9dcc0fd1faccf2c8f8ff5e254b82a33f9d998b --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_left ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f97d97e59c205fda791ac1d231b1429e1f8d4ec2 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a07f28f830a125d865eb89a4a456cb5d0aa2b62 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3c51d9420184c91d8ddc1b15e9fb50659c1651d6 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_right ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..835caa7b879891ed4cd0d24bac61bdaf6a686efb --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..53bd5eee20205d56ca4356df4f2bb1118e0ff93d --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4348319f695da2819e24143754777746bdc35d6 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp @@ -0,0 +1,12 @@ +#include<cuda/TestCuda_SubView_c01.cpp> +#include<cuda/TestCuda_SubView_c02.cpp> +#include<cuda/TestCuda_SubView_c03.cpp> +#include<cuda/TestCuda_SubView_c04.cpp> +#include<cuda/TestCuda_SubView_c05.cpp> +#include<cuda/TestCuda_SubView_c06.cpp> +#include<cuda/TestCuda_SubView_c07.cpp> +#include<cuda/TestCuda_SubView_c08.cpp> +#include<cuda/TestCuda_SubView_c09.cpp> +#include<cuda/TestCuda_SubView_c10.cpp> +#include<cuda/TestCuda_SubView_c11.cpp> +#include<cuda/TestCuda_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp new file mode 100644 index 0000000000000000000000000000000000000000..800a458af918c9a1bca1f4c3d6816c7a3c9b4403 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp @@ -0,0 +1,120 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , team_tag ) +{ + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( cuda , team_shared_request) { + TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +//THis Tests request to much L0 scratch +//TEST_F( cuda, team_scratch_request) { +// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); +// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +//} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( cuda , team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( cuda, shmem_size) { + TestShmemSize< Kokkos::Cuda >(); +} + +TEST_F( cuda, multi_level_scratch) { + TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( cuda , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) ); +} + +TEST_F( cuda, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 ); +} + + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c01ca1c1463c6573c8d9e51c0ca31ed43c19941e --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp @@ -0,0 +1,59 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , impl_view_mapping_a ) { + test_view_mapping< Kokkos::CudaSpace >(); + test_view_mapping_operator< Kokkos::CudaSpace >(); +} + +TEST_F( cuda , view_of_class ) +{ + TestViewMappingClassValue< Kokkos::CudaSpace >::run(); + TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8e821ada000678c762b22db574dd1e0d816bbd54 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , impl_view_mapping_d ) { + test_view_mapping< Kokkos::CudaHostPinnedSpace >(); + test_view_mapping_operator< Kokkos::CudaHostPinnedSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf29a68e96586dc5d194bd0b28338259784dceb0 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , impl_view_mapping_c ) { + test_view_mapping< Kokkos::CudaUVMSpace >(); + test_view_mapping_operator< Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp new file mode 100644 index 0000000000000000000000000000000000000000..db14b5158f6efa01a6397df98041827a830158d4 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Cuda >(); +} + + + +TEST_F( cuda , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::CudaUVMSpace > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::CudaUVMSpace > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::CudaUVMSpace > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + Kokkos::fence(); + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + Kokkos::fence(); + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + Kokkos::fence(); + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} + Kokkos::fence(); +} + +//---------------------------------------------------------------------------- + +TEST_F( cuda , view_aggregate ) +{ + TestViewAggregate< Kokkos::Cuda >(); +} + +TEST_F( cuda , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Cuda >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp new file mode 100644 index 0000000000000000000000000000000000000000..07d425647330228815a7103e6f7596a8a2f2a460 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp @@ -0,0 +1,63 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda , impl_shared_alloc ) { + test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >(); +} + +TEST_F( cuda , impl_view_mapping_b ) { + test_view_mapping_subview< Kokkos::CudaSpace >(); + test_view_mapping_subview< Kokkos::CudaUVMSpace >(); + test_view_mapping_subview< Kokkos::CudaHostPinnedSpace >(); + TestViewMappingAtomic< Kokkos::CudaSpace >::run(); + TestViewMappingAtomic< Kokkos::CudaUVMSpace >::run(); + TestViewMappingAtomic< Kokkos::CudaHostPinnedSpace >::run(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp new file mode 100644 index 0000000000000000000000000000000000000000..34721f02dc73f418ba7c348fe65c3a59d534dc7c --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_api_a) { + typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ; + typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ; + + TestViewAPI< double , Kokkos::Cuda >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp new file mode 100644 index 0000000000000000000000000000000000000000..abbcf3bf8bfa6d89ff5c5a5891d8cd16018becf0 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_api_b) { + TestViewAPI< double , Kokkos::CudaUVMSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9899642035ada183fe7b7b5c4a60610e3c271739 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <cuda/TestCuda.hpp> + +namespace Test { + +TEST_F( cuda, view_api_c) { + TestViewAPI< double , Kokkos::CudaHostPinnedSpace >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp new file mode 100644 index 0000000000000000000000000000000000000000..01324a1eeb82f21802a1055a0c42609e0b1e5c44 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_TEST_OPENMPHPP +#define KOKKOS_TEST_OPENMPHPP +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskScheduler.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + const unsigned threads_count = std::max( 1u , numa_count ) * + std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + + Kokkos::OpenMP::initialize( threads_count ); + Kokkos::OpenMP::print_configuration( std::cout , true ); + srand(10231); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + + omp_set_num_threads(1); + + ASSERT_EQ( 1 , omp_get_max_threads() ); + } +}; + +} +#endif diff --git a/lib/kokkos/core/unit_test/TestOpenMP.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp similarity index 80% rename from lib/kokkos/core/unit_test/TestOpenMP.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp index 6e8fc4517917bfcaaeecba6fbc2ac59f6090350d..91722c8490be3ad33a635359c449eaa3df993369 100644 --- a/lib/kokkos/core/unit_test/TestOpenMP.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,118 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemoryPool.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - -#include <TestPolicyConstruction.hpp> - -#include <TestMDRange.hpp> +#include <openmp/TestOpenMP.hpp> namespace Test { -class openmp : public ::testing::Test { -protected: - static void SetUpTestCase() - { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - const unsigned threads_count = std::max( 1u , numa_count ) * - std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); - - Kokkos::OpenMP::initialize( threads_count ); - Kokkos::OpenMP::print_configuration( std::cout , true ); - srand(10231); - } - - static void TearDownTestCase() - { - Kokkos::OpenMP::finalize(); - - omp_set_num_threads(1); - - ASSERT_EQ( 1 , omp_get_max_threads() ); - } -}; - - -TEST_F( openmp , md_range ) { - TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100); - - TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100); -} - -TEST_F( openmp , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >(); -} - -TEST_F( openmp, policy_construction) { - TestRangePolicyConstruction< Kokkos::OpenMP >(); - TestTeamPolicyConstruction< Kokkos::OpenMP >(); -} - -TEST_F( openmp , impl_view_mapping ) { - test_view_mapping< Kokkos::OpenMP >(); - test_view_mapping_subview< Kokkos::OpenMP >(); - test_view_mapping_operator< Kokkos::OpenMP >(); - TestViewMappingAtomic< Kokkos::OpenMP >::run(); -} - -TEST_F( openmp, view_impl) { - test_view_impl< Kokkos::OpenMP >(); -} - -TEST_F( openmp, view_api) { - TestViewAPI< double , Kokkos::OpenMP >(); -} - -TEST_F( openmp , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::OpenMP >(); -} - TEST_F( openmp , atomics ) { const int loop_count = 1e4 ; @@ -204,6 +100,8 @@ TEST_F( openmp , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); @@ -214,6 +112,8 @@ TEST_F( openmp , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); @@ -224,6 +124,8 @@ TEST_F( openmp , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); @@ -234,6 +136,8 @@ TEST_F( openmp , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); @@ -244,6 +148,8 @@ TEST_F( openmp , atomic_operations ) ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 1 ) ) ); ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 2 ) ) ); diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6910363515a0c2d0ec0531ba1f643f8afaf1983 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -0,0 +1,189 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp , init ) { + ; +} + +TEST_F( openmp , md_range ) { + TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100); + + TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100); +} + +TEST_F( openmp, policy_construction) { + TestRangePolicyConstruction< Kokkos::OpenMP >(); + TestTeamPolicyConstruction< Kokkos::OpenMP >(); +} + +TEST_F( openmp , range_tag ) +{ + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0); + + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); + + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3); + + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + + +//---------------------------------------------------------------------------- + +TEST_F( openmp , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +TEST_F( openmp , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestFib< Kokkos::OpenMP >::run(i, (i+1)*(i+1)*10000 ); + } +} + +TEST_F( openmp , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run(i); + } +} + +TEST_F( openmp , task_team ) +{ + TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run(1000); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //put back after testing +} + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) +TEST_F( openmp , cxx11 ) +{ + if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) ); + } +} +#endif + +TEST_F( openmp, tile_layout ) +{ + TestTile::test< Kokkos::OpenMP , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::OpenMP , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::OpenMP , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::OpenMP , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::OpenMP , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::OpenMP , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::OpenMP , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::OpenMP , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::OpenMP , 4 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::OpenMP , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::OpenMP , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::OpenMP , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::OpenMP , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 11 ); +} + + +TEST_F( openmp , dispatch ) +{ + const int repeat = 100 ; + for ( int i = 0 ; i < repeat ; ++i ) { + for ( int j = 0 ; j < repeat ; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >(0,j) + , KOKKOS_LAMBDA( int ) {} ); + }} +} + + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d41e1493eea6306d68087d1a8562ab963e1ec039 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, long_reduce) { + TestReduce< long , Kokkos::OpenMP >( 0 ); + TestReduce< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, double_reduce) { + TestReduce< double , Kokkos::OpenMP >( 0 ); + TestReduce< double , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp , reducers ) +{ + TestReducers<int, Kokkos::OpenMP>::execute_integer(); + TestReducers<size_t, Kokkos::OpenMP>::execute_integer(); + TestReducers<double, Kokkos::OpenMP>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic(); +} + +TEST_F( openmp, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::OpenMP >( 0 ); + TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::OpenMP >( 0 ); + TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::OpenMP >( 0 ); + TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp , scan ) +{ + TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 ); + TestScan< Kokkos::OpenMP >( 0 ); + TestScan< Kokkos::OpenMP >( 100000 ); + TestScan< Kokkos::OpenMP >( 10000000 ); + Kokkos::OpenMP::fence(); +} + +#if 0 +TEST_F( openmp , scan_small ) +{ + typedef TestScan< Kokkos::OpenMP , Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor ; + for ( int i = 0 ; i < 1000 ; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::OpenMP::fence(); +} +#endif + +TEST_F( openmp , team_scan ) +{ + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( openmp , team_long_reduce) { + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( openmp , team_double_reduce) { + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( openmp , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/TestOpenMP_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp similarity index 70% rename from lib/kokkos/core/unit_test/TestOpenMP_a.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp index 64eac66804b1ef6a053930d6db47abb566ccda66..9854417e42da5a8bdd6986b85fbdd754bab3e57b 100644 --- a/lib/kokkos/core/unit_test/TestOpenMP_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,60 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemoryPool.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - -#include <TestPolicyConstruction.hpp> - +#include <openmp/TestOpenMP.hpp> namespace Test { -class openmp : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - TEST_F( openmp, view_subview_auto_1d_left ) { TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >(); } @@ -134,17 +88,5 @@ TEST_F( openmp, view_subview_right_3 ) { TestViewSubview::test_right_3< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); -} - -TEST_F( openmp, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); -} - -TEST_F( openmp, view_subview_2d_from_5d ) { - TestViewSubview::test_2d_subview_5d< Kokkos::OpenMP >(); -} - } // namespace test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2aa1fc5c633ffab0319c37c7a00a9abe48438597 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_layoutleft_to_layoutleft) { + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +TEST_F( openmp, view_subview_layoutright_to_layoutright) { + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1a6871cfca8f3136b13011f66576cd7a9d891978 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b04edbb997d564a2e921bacf7b36959b17e8755f --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_1d_assign_atomic ) { + TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..765e235830db2f7e48ad8fe9df271429fef2c2ab --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_1d_assign_randomaccess ) { + TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9d8b62708a3d4d898ddbc923b733c78c869c2826 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c19cf0e57dcf7058f4f0aeb4752465c470e9fa9 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_2d_from_3d_atomic ) { + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c1bdf72351b02958f5e1e857c41f7e5d999ade64 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) { + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp new file mode 100644 index 0000000000000000000000000000000000000000..08a3b5a54a2c66599ebc61384357324a79815507 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_3d_from_5d_left ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0864ebbdaa44b1bd00a154fe2f7fcf4b55ae48eb --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e38dfecbf6e353bcab69f7341d2754ea6ef85cf9 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp similarity index 89% rename from lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp index 61d2e35702f998a83e0796e7d291dff7e3466dd4..b7e4683d23d18bb838c97a1fa198b2d38874de77 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,21 +36,17 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <openmp/TestOpenMP.hpp> -#ifndef KOKKOS_VIEWTILELEFT_HPP -#define KOKKOS_VIEWTILELEFT_HPP - -#include <impl/KokkosExp_ViewTile.hpp> - -namespace Kokkos { - -using Kokkos::Experimental::tile_subview ; +namespace Test { +TEST_F( openmp, view_subview_3d_from_5d_right ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP >(); } -#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */ +} // namespace test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fc3e66fd4853c6104503aaf461eda97183cb44e1 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e21a13ee579e5052241252ffa6b99ba49f9c6b47 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9da159ab5773a0a7b1a49605cf1a88294a29d09d --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp @@ -0,0 +1,12 @@ +#include<openmp/TestOpenMP_SubView_c01.cpp> +#include<openmp/TestOpenMP_SubView_c02.cpp> +#include<openmp/TestOpenMP_SubView_c03.cpp> +#include<openmp/TestOpenMP_SubView_c04.cpp> +#include<openmp/TestOpenMP_SubView_c05.cpp> +#include<openmp/TestOpenMP_SubView_c06.cpp> +#include<openmp/TestOpenMP_SubView_c07.cpp> +#include<openmp/TestOpenMP_SubView_c08.cpp> +#include<openmp/TestOpenMP_SubView_c09.cpp> +#include<openmp/TestOpenMP_SubView_c10.cpp> +#include<openmp/TestOpenMP_SubView_c11.cpp> +#include<openmp/TestOpenMP_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/TestOpenMP_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp similarity index 52% rename from lib/kokkos/core/unit_test/TestOpenMP_b.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp index 6cc2476014c8d8e07ef6bc6a60b38c3660d3d7c4..1539e30e1936998c8ea389144c0617b468ab5181 100644 --- a/lib/kokkos/core/unit_test/TestOpenMP_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -40,122 +40,29 @@ // ************************************************************************ //@HEADER */ - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] - -#include <Kokkos_Core.hpp> - -//---------------------------------------------------------------------------- - -#include <TestViewImpl.hpp> -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewSubview.hpp> -#include <TestViewOfClass.hpp> - -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> - -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestAggregateReduction.hpp> -#include <TestCompilerMacros.hpp> -#include <TestMemoryPool.hpp> - - -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestMemorySpaceTracking.hpp> -#include <TestTemplateMetaFunctions.hpp> - -#include <TestPolicyConstruction.hpp> - +#include <openmp/TestOpenMP.hpp> namespace Test { -class openmp : public ::testing::Test { -protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; - -TEST_F( openmp , range_tag ) -{ - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); -} - TEST_F( openmp , team_tag ) { + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); } -TEST_F( openmp, long_reduce) { - TestReduce< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce) { - TestReduce< double , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp , reducers ) -{ - TestReducers<int, Kokkos::OpenMP>::execute_integer(); - TestReducers<size_t, Kokkos::OpenMP>::execute_integer(); - TestReducers<double, Kokkos::OpenMP>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic(); -} - -TEST_F( openmp, team_long_reduce) { - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( openmp, team_double_reduce) { - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( openmp, team_shared_request) { +TEST_F( openmp , team_shared_request) { TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); } @@ -166,7 +73,7 @@ TEST_F( openmp, team_scratch_request) { } #if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) -TEST_F( openmp, team_lambda_shared_request) { +TEST_F( openmp , team_lambda_shared_request) { TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); } @@ -181,5 +88,35 @@ TEST_F( openmp, multi_level_scratch) { TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); } +TEST_F( openmp , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) ); +} + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#ifndef SKIP_TEST +TEST_F( openmp, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 32 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 16 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 16 , 16 ); +} +#endif + } // namespace test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82cbf3ea18ecf7c3c424c73fe3e41ebf4a4e0c26 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp , impl_view_mapping_a ) { + test_view_mapping< Kokkos::OpenMP >(); + test_view_mapping_operator< Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2d4f87fdd417ab2d1036884dcce4b0df5793396 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp @@ -0,0 +1,121 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <openmp/TestOpenMP.hpp> + +namespace Test { + +TEST_F( openmp , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >(); +} + +TEST_F( openmp , impl_view_mapping_b ) { + test_view_mapping_subview< Kokkos::OpenMP >(); + TestViewMappingAtomic< Kokkos::OpenMP >::run(); +} + +TEST_F( openmp, view_api) { + TestViewAPI< double , Kokkos::OpenMP >(); +} + +TEST_F( openmp , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::OpenMP >(); +} + + + +TEST_F( openmp , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::OpenMP > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::OpenMP > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::OpenMP > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( openmp , view_aggregate ) +{ + TestViewAggregate< Kokkos::OpenMP >(); +} + +TEST_F( openmp , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::OpenMP >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial.hpp b/lib/kokkos/core/unit_test/serial/TestSerial.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a966257fca62f727dd050ac8e9ba6f32cf6985ca --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial.hpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_TEST_SERIALHPP +#define KOKKOS_TEST_SERIALHPP +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskScheduler.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() + { + Kokkos::HostSpace::execution_space::initialize(); + } + static void TearDownTestCase() + { + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +} +#endif diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6eec0683aeecaeae28a46a743b01164d7db7eb9c --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp @@ -0,0 +1,168 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial , atomics ) +{ + const int loop_count = 1e6 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) ); +} + +TEST_F( serial , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 12) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) ); + } + +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b1c32cfaf5e1249e3de3e338bd2abf402525c95b --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -0,0 +1,165 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial , md_range ) { + TestMDRange_2D< Kokkos::Serial >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100); +} + +TEST_F( serial, policy_construction) { + TestRangePolicyConstruction< Kokkos::Serial >(); + TestTeamPolicyConstruction< Kokkos::Serial >(); +} + +TEST_F( serial , range_tag ) +{ + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); + + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + + +//---------------------------------------------------------------------------- + +TEST_F( serial , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +TEST_F( serial , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Serial >::run(i); + } +} + +TEST_F( serial , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run(i); + } +} + +TEST_F( serial , task_team ) +{ + TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run(1000); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing +} + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) +TEST_F( serial , cxx11 ) +{ + if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) ); + } +} +#endif + +TEST_F( serial, tile_layout ) +{ + TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 ); +} + + + + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25b5ac6d16a8d101dd1e7d940007a107d1c814fc --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, long_reduce) { + TestReduce< long , Kokkos::Serial >( 0 ); + TestReduce< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, double_reduce) { + TestReduce< double , Kokkos::Serial >( 0 ); + TestReduce< double , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial , reducers ) +{ + TestReducers<int, Kokkos::Serial>::execute_integer(); + TestReducers<size_t, Kokkos::Serial>::execute_integer(); + TestReducers<double, Kokkos::Serial>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic(); +} + +TEST_F( serial, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Serial >( 0 ); + TestReduceDynamic< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Serial >( 0 ); + TestReduceDynamic< double , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Serial >( 0 ); + TestReduceDynamicView< long , Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial , scan ) +{ + TestScan< Kokkos::Serial >::test_range( 1 , 1000 ); + TestScan< Kokkos::Serial >( 0 ); + TestScan< Kokkos::Serial >( 10 ); + TestScan< Kokkos::Serial >( 10000 ); +} + +TEST_F( serial , team_scan ) +{ + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( serial , team_long_reduce) { + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( serial , team_double_reduce) { + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( serial , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc838ccde4b36cf964d0da97500fdbd921a85aa0 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::Serial >(); +} + +TEST_F( serial, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e6a5b56d3ed48ac2301e56b944e4924dcb79451e --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_layoutleft_to_layoutleft) { + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +TEST_F( serial, view_subview_layoutright_to_layoutright) { + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0b7a0d3bfa6fa514195a4fd6241fc262f0ad884d --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ca7285c1f8331cb6992411d6b35d7bc054945a3 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_1d_assign_atomic ) { + TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d156c741524315d2fb66fdc5e852329d846d3ae --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_1d_assign_randomaccess ) { + TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ebf0e5c99155afe17dea3807981d712e1d67c601 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..74acb92f1b9e632a980b7d0141a54200aebbfd15 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_2d_from_3d_atomic ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8075d46e0fe15c4c15a47e80f6172d4990fd6ce5 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_2d_from_3d_randomaccess ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9ce8222643a5d3a183fad578013945a67efd6847 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_left ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8a5c8f33fdc70a2408aade42f21b3c451753b4c --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_left_atomic ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b66f15f17da1b7f0bcb24459678965dacee04f9b --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5e5e3cf3d1af0f0755ab8fa3f8be9f846ff554e9 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_right ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..55a353bcafef5e852ec33c80d9084f7c2236efcc --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_right_atomic ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a168e1e232ff5f71cce593be776496cbd7dd6c25 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a489b0fcb585aa0e12310f09a0701188b8814045 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp @@ -0,0 +1,12 @@ +#include<serial/TestSerial_SubView_c01.cpp> +#include<serial/TestSerial_SubView_c02.cpp> +#include<serial/TestSerial_SubView_c03.cpp> +#include<serial/TestSerial_SubView_c04.cpp> +#include<serial/TestSerial_SubView_c05.cpp> +#include<serial/TestSerial_SubView_c06.cpp> +#include<serial/TestSerial_SubView_c07.cpp> +#include<serial/TestSerial_SubView_c08.cpp> +#include<serial/TestSerial_SubView_c09.cpp> +#include<serial/TestSerial_SubView_c10.cpp> +#include<serial/TestSerial_SubView_c11.cpp> +#include<serial/TestSerial_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3318e5f2457a9f4e79a0dbdd2a5f44571b895be1 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp @@ -0,0 +1,117 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial , team_tag ) +{ + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( serial , team_shared_request) { + TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( serial, team_scratch_request) { + TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( serial , team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( serial, shmem_size) { + TestShmemSize< Kokkos::Serial >(); +} + +TEST_F( serial, multi_level_scratch) { + TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( serial , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) ); +} + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#ifndef SKIP_TEST +TEST_F( serial, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 32 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 16 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 16 , 16 ); +} +#endif + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4c655fe770f26fd8d6b239251c5d6301140faa09 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial , impl_view_mapping_a ) { + test_view_mapping< Kokkos::Serial >(); + test_view_mapping_operator< Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4947f2eaaef607b04d680a7c9c64ae6f2d8e6087 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp @@ -0,0 +1,121 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <serial/TestSerial.hpp> + +namespace Test { + +TEST_F( serial , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >(); +} + +TEST_F( serial , impl_view_mapping_b ) { + test_view_mapping_subview< Kokkos::Serial >(); + TestViewMappingAtomic< Kokkos::Serial >::run(); +} + +TEST_F( serial, view_api) { + TestViewAPI< double , Kokkos::Serial >(); +} + +TEST_F( serial , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Serial >(); +} + + + +TEST_F( serial , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::Serial > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Serial > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Serial > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( serial , view_aggregate ) +{ + TestViewAggregate< Kokkos::Serial >(); +} + +TEST_F( serial , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Serial >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads.hpp b/lib/kokkos/core/unit_test/threads/TestThreads.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bb9f36581aa753f15c789d2a7592031ed70caa57 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads.hpp @@ -0,0 +1,114 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOS_TEST_THREADSHPP +#define KOKKOS_TEST_THREADSHPP +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <TestTile.hpp> + +//---------------------------------------------------------------------------- + +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> + + +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskScheduler.hpp> +#include <TestMemoryPool.hpp> + + +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> + +#include <TestPolicyConstruction.hpp> + +#include <TestMDRange.hpp> + +namespace Test { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + unsigned threads_count = 0 ; + + threads_count = std::max( 1u , numa_count ) + * std::max( 2u , cores_per_numa * threads_per_core ); + + Kokkos::Threads::initialize( threads_count ); + Kokkos::Threads::print_configuration( std::cout , true /* detailed */ ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + + +} +#endif diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ce32fc3385b28f1be58aa82606a59bffc192bc1 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp @@ -0,0 +1,168 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads , atomics ) +{ + const int loop_count = 1e4 ; + + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,1) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,2) ) ); + ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,3) ) ); +} + +TEST_F( threads , atomic_operations ) +{ + const int start = 1; //Avoid zero for division + const int end = 11; + for (int i = start; i < end; ++i) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) ); + } + +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d9f17cc88a85105e41c887be2531261a58f8f436 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -0,0 +1,189 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads , init ) { + ; +} + +TEST_F( threads , md_range ) { + TestMDRange_2D< Kokkos::Threads >::test_for2(100,100); + + TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100); +} + +TEST_F( threads, policy_construction) { + TestRangePolicyConstruction< Kokkos::Threads >(); + TestTeamPolicyConstruction< Kokkos::Threads >(); +} + +TEST_F( threads , range_tag ) +{ + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0); + + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); + + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3); + + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); + + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); +} + + +//---------------------------------------------------------------------------- + +TEST_F( threads , compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , memory_pool ) +{ + bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >(); +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) +/* +TEST_F( threads , task_fib ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Threads >::run(i); + } +} + +TEST_F( threads , task_depend ) +{ + for ( int i = 0 ; i < 25 ; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run(i); + } +} + +TEST_F( threads , task_team ) +{ + TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run(1000); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run(1000); //put back after testing +} +*/ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) +TEST_F( threads , cxx11 ) +{ + if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) ); + } +} +#endif + +TEST_F( threads, tile_layout ) +{ + TestTile::test< Kokkos::Threads , 1 , 1 >( 1 , 1 ); + TestTile::test< Kokkos::Threads , 1 , 1 >( 2 , 3 ); + TestTile::test< Kokkos::Threads , 1 , 1 >( 9 , 10 ); + + TestTile::test< Kokkos::Threads , 2 , 2 >( 1 , 1 ); + TestTile::test< Kokkos::Threads , 2 , 2 >( 2 , 3 ); + TestTile::test< Kokkos::Threads , 2 , 2 >( 4 , 4 ); + TestTile::test< Kokkos::Threads , 2 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Threads , 2 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Threads , 4 , 2 >( 9 , 9 ); + + TestTile::test< Kokkos::Threads , 4 , 4 >( 1 , 1 ); + TestTile::test< Kokkos::Threads , 4 , 4 >( 4 , 4 ); + TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 9 ); + TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 11 ); + + TestTile::test< Kokkos::Threads , 8 , 8 >( 1 , 1 ); + TestTile::test< Kokkos::Threads , 8 , 8 >( 4 , 4 ); + TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 9 ); + TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 11 ); +} + + +TEST_F( threads , dispatch ) +{ + const int repeat = 100 ; + for ( int i = 0 ; i < repeat ; ++i ) { + for ( int j = 0 ; j < repeat ; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j) + , KOKKOS_LAMBDA( int ) {} ); + }} +} + + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a637d1e3ab654b402e49b7d3aec582e425d2592a --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, long_reduce) { + TestReduce< long , Kokkos::Threads >( 0 ); + TestReduce< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, double_reduce) { + TestReduce< double , Kokkos::Threads >( 0 ); + TestReduce< double , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads , reducers ) +{ + TestReducers<int, Kokkos::Threads>::execute_integer(); + TestReducers<size_t, Kokkos::Threads>::execute_integer(); + TestReducers<double, Kokkos::Threads>::execute_float(); + TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic(); +} + +TEST_F( threads, long_reduce_dynamic ) { + TestReduceDynamic< long , Kokkos::Threads >( 0 ); + TestReduceDynamic< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, double_reduce_dynamic ) { + TestReduceDynamic< double , Kokkos::Threads >( 0 ); + TestReduceDynamic< double , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, long_reduce_dynamic_view ) { + TestReduceDynamicView< long , Kokkos::Threads >( 0 ); + TestReduceDynamicView< long , Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads , scan ) +{ + TestScan< Kokkos::Threads >::test_range( 1 , 1000 ); + TestScan< Kokkos::Threads >( 0 ); + TestScan< Kokkos::Threads >( 100000 ); + TestScan< Kokkos::Threads >( 10000000 ); + Kokkos::Threads::fence(); +} + +#if 0 +TEST_F( threads , scan_small ) +{ + typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ; + for ( int i = 0 ; i < 1000 ; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::Threads::fence(); +} +#endif + +TEST_F( threads , team_scan ) +{ + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( threads , team_long_reduce) { + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( threads , team_double_reduce) { + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( threads , reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2df9e19deb0130359d81b8c3cc001bb85ee7cb2f --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_auto_1d_left ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_auto_1d_right ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_auto_1d_stride ) { + TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_assign_strided ) { + TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_0 ) { + TestViewSubview::test_left_0< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_1 ) { + TestViewSubview::test_left_1< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_2 ) { + TestViewSubview::test_left_2< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_left_3 ) { + TestViewSubview::test_left_3< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_0 ) { + TestViewSubview::test_right_0< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_1 ) { + TestViewSubview::test_right_1< Kokkos::Threads >(); +} + +TEST_F( threads, view_subview_right_3 ) { + TestViewSubview::test_right_3< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d57dbe97c0d38aaa6a2e48816eb9872a8585afb7 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_layoutleft_to_layoutleft) { + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +TEST_F( threads, view_subview_layoutright_to_layoutright) { + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..67d998c0e86488df0023cc0138ffe022cdc52d94 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_1d_assign ) { + TestViewSubview::test_1d_assign< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e340240c48d6d28c9bc4c79b777a3e1a4a8c4ddc --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_1d_assign_atomic ) { + TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ad27fa0fa6cee9db3eb63c581a175eee0cdd6e4e --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_1d_assign_randomaccess ) { + TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6fca47cc4ce41b56155fac8ce1d4b158d5e99c82 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_2d_from_3d ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7dfca941582dee3d667f60152854ea30b393548 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_2d_from_3d_atomic ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..38e8394918614fdb528e9111d7fc1f54c7ff4d83 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_2d_from_3d_randomaccess ) { + TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f01fe6b5e6104416bb1f2f680cafeab48cac1ad --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_left ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e9a1ccbe30edcf7f512a5c20462df83cf52c3ac4 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_left_atomic ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8b6c8743dd25a97db5f00e5bc7157c9f040c5d9 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) { + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7cef6fa07be88859c063470857d775964c74f2fa --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_right ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d67bf3157e337fef0af36dbba934f8bc22d74d0c --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_right_atomic ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8a2c825cf3a9474d149d81a225cbadb16338cd7 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) { + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03f31b78c0bca12ef085d67b59a8f5ea45a5d614 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads , team_tag ) +{ + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); + TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); +} + +TEST_F( threads , team_shared_request) { + TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( threads, team_scratch_request) { + TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +TEST_F( threads , team_lambda_shared_request) { + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif + +TEST_F( threads, shmem_size) { + TestShmemSize< Kokkos::Threads >(); +} + +TEST_F( threads, multi_level_scratch) { + TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( threads , team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) ); +} + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#ifndef SKIP_TEST +TEST_F( threads, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 32 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 16 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 16 , 16 ); +} +#endif + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46a576b027fb2149302239ba31d6e53bd001e3ce --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads , impl_view_mapping_a ) { + test_view_mapping< Kokkos::Threads >(); + test_view_mapping_operator< Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b5d6ac843d8177149d53fe1cb52528c6ef760f3d --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp @@ -0,0 +1,121 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include <threads/TestThreads.hpp> + +namespace Test { + +TEST_F( threads , impl_shared_alloc ) { + test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >(); +} + +TEST_F( threads , impl_view_mapping_b ) { + test_view_mapping_subview< Kokkos::Threads >(); + TestViewMappingAtomic< Kokkos::Threads >::run(); +} + +TEST_F( threads, view_api) { + TestViewAPI< double , Kokkos::Threads >(); +} + +TEST_F( threads , view_nested_view ) +{ + ::Test::view_nested_view< Kokkos::Threads >(); +} + + + +TEST_F( threads , view_remap ) +{ + enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3] , + Kokkos::LayoutRight , + Kokkos::Threads > output_type ; + + typedef Kokkos::View< int**[N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Threads > input_type ; + + typedef Kokkos::View< int*[N0][N2][N3] , + Kokkos::LayoutLeft , + Kokkos::Threads > diff_type ; + + output_type output( "output" , N0 ); + input_type input ( "input" , N0 , N1 ); + diff_type diff ( "diff" , N0 ); + + int value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + input(i0,i1,i2,i3) = ++value ; + }}}} + + // Kokkos::deep_copy( diff , input ); // throw with incompatible shape + Kokkos::deep_copy( output , input ); + + value = 0 ; + for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { + for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { + for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { + for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { + ++value ; + ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); + }}}} +} + +//---------------------------------------------------------------------------- + +TEST_F( threads , view_aggregate ) +{ + TestViewAggregate< Kokkos::Threads >(); +} + +TEST_F( threads , template_meta_functions ) +{ + TestTemplateMetaFunctions<int, Kokkos::Threads >(); +} + +} // namespace test + diff --git a/lib/kokkos/doc/README b/lib/kokkos/doc/README deleted file mode 100644 index 31e75f365c21a116a1fb736097f4f524e8d1e021..0000000000000000000000000000000000000000 --- a/lib/kokkos/doc/README +++ /dev/null @@ -1,32 +0,0 @@ -Kokkos uses the Doxygen tool for providing three documentation -sources: -- man pages -- Latex User Guide -- HTML Online User Guide. - -Man Pages - -Man pages are available for all files and functions in the directory -TRILINOS_HOME/doc/kokkos/man, where TRILINOS_HOME is the location of your -copy of Trilinos. To use these pages with the Unix man utility, add -the directory to your man path as follows: - -setenv MANPATH `echo $MANPATH`:TRILINOS_HOME/doc/kokkos/man - - -LaTeX User Guide - -A postscript version of this guide is in -TRILINOS_HOME/doc/kokkos/latex/user_guide.ps. The LaTeX source is in the -directory TRILINOS_HOME/doc/kokkos/latex. - -HTML Online User Guide - -The online guide is initiated by pointing your browser to -TRILINOS_HOME/doc/kokkos/html/index.html - -Any question, comments or suggestions are welcome. Please send to -Mike Heroux at - -320-845-7695 -maherou@sandia.gov diff --git a/lib/kokkos/doc/design_notes_space_instances.md b/lib/kokkos/doc/design_notes_space_instances.md new file mode 100644 index 0000000000000000000000000000000000000000..487fa25bcb32875ed3ba90821aba006a13cd506e --- /dev/null +++ b/lib/kokkos/doc/design_notes_space_instances.md @@ -0,0 +1,166 @@ +# Design Notes for Execution and Memory Space Instances + + +## Execution Spaces + + * Work is *dispatched* to an execution space instance + + + +## Host Associated Execution Space Instances + +Vocabulary and examples assuming C++11 Threads Support Library + + * A host-side *control* thread dispatches work to an instance + + * `this_thread` is the control thread + + * `main` is the initial control thread + + * An execution space instance is a pool of threads + + * All instances are disjoint thread pools + + * Exactly one control thread is associated with + an instance and only that control thread may + dispatch work to to that instance + + * A control thread may be a member of an instance, + if so then it is also the control thread associated + with that instance + + * The pool of threads associated with an instances is not mutatable + + * The pool of threads associated with an instance may be masked + + - Allows work to be dispatched to a subset of the pool + + - Example: only one hyperthread per core of the instance + + - When a mask is applied to an instance that mask + remains until cleared or another mask is applied + + - Masking is portable by defining it as using a fraction + of the available resources (threads) + + * Instances are shared (referenced counted) objects, + just like `Kokkos::View` + +``` +struct StdThread { + void mask( float fraction ); + void unmask() { mask( 1.0 ); } +}; +``` + + + +### Requesting an Execution Space Instance + + * `Space::request(` *who* `,` *what* `,` *control-opt* `)` + + * *who* is an identifier for subsquent queries regarding + who requested each instance + + * *what* is the number of threads and how they should be placed + + - Placement within locality-topology hierarchy; e.g., HWLOC + + - Compact within a level of hierarchy, or striped across that level; + e.g., socket or NUMA region + + - Granularity of request is core + + * *control-opt* optionally specifies whether the instance + has a new control thread + + - *control-opt* includes a control function / closure + + - The new control thread is a member of the instance + + - The control function is called by the new control thread + and is passed a `const` instance + + - The instance is **not** returned to the creating control thread + + * `std::thread` that is not a member of an instance is + *hard blocked* on a `std::mutex` + + - One global mutex or one mutex per thread? + + * `std::thread` that is a member of an instance is + *spinning* waiting for work, or are working + +``` +struct StdThread { + + struct Resource ; + + static StdThread request(); // default + + static StdThread request( const std::string & , const Resource & ); + + // If the instance can be reserved then + // allocate a copy of ControlClosure and invoke + // ControlClosure::operator()( const StdThread intance ) const + template< class ControlClosure > + static bool request( const std::string & , const Resource & + , const ControlClosure & ); +}; +``` + +### Relinquishing an Execution Space Instance + + * De-referencing the last reference-counted instance + relinquishes the pool of threads + + * If a control thread was created for the instance then + it is relinquished when that control thread returns + from the control function + + - Requires the reference count to be zero, an error if not + + * No *forced* relinquish + + + +## CUDA Associated Execution Space Instances + + * Only a signle CUDA architecture + + * An instance is a device + stream + + * A stream is exclusive to an instance + + * Only a host-side control thread can dispatch work to an instance + + * Finite number of streams per device + + * ISSUE: How to use CUDA `const` memory with multiple streams? + + * Masking can be mapped to restricting the number of CUDA blocks + to the fraction of available resources; e.g., maximum resident blocks + + +### Requesting an Execution Space Instance + + * `Space::request(` *who* `,` *what* `)` + + * *who* is an identifier for subsquent queries regarding + who requested each instance + + * *what* is which device, the stream is a requested/relinquished resource + + +``` +struct Cuda { + + struct Resource ; + + static Cuda request(); + + static Cuda request( const std::string & , const Resource & ); +}; +``` + + diff --git a/lib/kokkos/example/common/VectorImport.hpp b/lib/kokkos/example/common/VectorImport.hpp index 8ecd74d463c08f3624cf2be2d44b0ca1e4d008ad..48b28f8c2c2556c676993e2b259e68f0eb0abf73 100644 --- a/lib/kokkos/example/common/VectorImport.hpp +++ b/lib/kokkos/example/common/VectorImport.hpp @@ -112,13 +112,13 @@ private: // rank == 1 or array_layout == LayoutRight enum { OK = Kokkos::Impl::StaticAssert< ( VectorType::rank == 1 ) || - Kokkos::Impl::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value + std::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value >::value }; typedef typename VectorType::HostMirror HostVectorType ; enum { ReceiveInPlace = - Kokkos::Impl::is_same< typename VectorType::memory_space , + std::is_same< typename VectorType::memory_space , typename HostVectorType::memory_space >::value }; const CommMessageType recv_msg ; diff --git a/lib/kokkos/example/feint/ElemFunctor.hpp b/lib/kokkos/example/feint/ElemFunctor.hpp index 651e34c2eed247f37986886c86f04ce24d76c551..583c4fda12a96a6c061ddb99d13e979a21f01a01 100644 --- a/lib/kokkos/example/feint/ElemFunctor.hpp +++ b/lib/kokkos/example/feint/ElemFunctor.hpp @@ -337,11 +337,7 @@ struct LumpElemToNode { // In this example we know that the ViewElemValue // array specification is < double*[nNode][nValue] > -#if KOKKOS_USING_EXP_VIEW enum { value_count = ViewElemValue::dimension::N2 }; -#else - enum { value_count = ViewElemValue::shape_type::N2 }; -#endif ViewNodeValue m_node_value ; ///< Integrated values at nodes ViewElemValue m_elem_value ; ///< Values apportioned to nodes diff --git a/lib/kokkos/example/feint/Makefile b/lib/kokkos/example/feint/Makefile index f198a974c1e34d4014323eb34d03e7aa1f7445ba..9abf51d107c5cfd9ae1184d4cfac606f3f6e1629 100644 --- a/lib/kokkos/example/feint/Makefile +++ b/lib/kokkos/example/feint/Makefile @@ -1,30 +1,28 @@ KOKKOS_PATH = ../.. +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +vpath %.cpp ${KOKKOS_SRC_PATH}/example/fixture ${KOKKOS_SRC_PATH}/example/feint -vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/feint - -EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/feint/*.hpp) +EXAMPLE_HEADERS = $(wildcard $(KOKKOS_SRC_PATH)/example/common/*.hpp ${KOKKOS_SRC_PATH}/example/fixture/*.hpp ${KOKKOS_SRC_PATH}/example/feint/*.hpp) default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += \ - -I${KOKKOS_PATH}/example/common \ - -I${KOKKOS_PATH}/example/fixture \ - -I${KOKKOS_PATH}/example/feint + -I${KOKKOS_SRC_PATH}/example/common \ + -I${KOKKOS_SRC_PATH}/example/fixture \ + -I${KOKKOS_SRC_PATH}/example/feint EXE_EXAMPLE_FEINT = KokkosExample_Feint OBJ_EXAMPLE_FEINT = BoxElemPart.o main.o diff --git a/lib/kokkos/example/fenl/Makefile b/lib/kokkos/example/fenl/Makefile index 5d8e6fd3034ec7c20044552a5688fc6751e374fb..24a0e61c18c4ce9efa1568534cfb4ad8bfccde9a 100644 --- a/lib/kokkos/example/fenl/Makefile +++ b/lib/kokkos/example/fenl/Makefile @@ -10,22 +10,18 @@ EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*. default: build_all echo "End Build" -include $(KOKKOS_PATH)/Makefile.kokkos - -# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos -# use KOKKOS_DEVICE here ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += \ -I${SRC_DIR}/../common \ -I${SRC_DIR}/../fixture \ diff --git a/lib/kokkos/example/fenl/fenl_impl.hpp b/lib/kokkos/example/fenl/fenl_impl.hpp index 64070ce55fdc1cf7b94d631a0f29b32eecfab357..15583c10e9f5568e921d838284aa28cc8521f3f4 100644 --- a/lib/kokkos/example/fenl/fenl_impl.hpp +++ b/lib/kokkos/example/fenl/fenl_impl.hpp @@ -192,7 +192,7 @@ Perf fenl( //------------------------------------ - const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ; + const int print_flag = use_print && std::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ; int comm_rank ; int comm_size ; diff --git a/lib/kokkos/example/fixture/Makefile b/lib/kokkos/example/fixture/Makefile index 990f4f18e7d420f2cb7c991ba2d9732f50ef1c56..5e684e344056cde31aec46c2a088e39c1c3bc2f9 100644 --- a/lib/kokkos/example/fixture/Makefile +++ b/lib/kokkos/example/fixture/Makefile @@ -1,29 +1,27 @@ KOKKOS_PATH = ../.. +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +vpath %.cpp ${KOKKOS_SRC_PATH}/example/fixture -vpath %.cpp ${KOKKOS_PATH}/example/fixture - -EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ) +EXAMPLE_HEADERS = $(wildcard $(KOKKOS_SRC_PATH)/example/common/*.hpp ${KOKKOS_SRC_PATH}/example/fixture/*.hpp ) default: build_all echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread + CXX = g++ endif +CXXFLAGS = -O3 +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + KOKKOS_CXXFLAGS += \ - -I${KOKKOS_PATH}/example/common \ - -I${KOKKOS_PATH}/example/fixture + -I${KOKKOS_SRC_PATH}/example/common \ + -I${KOKKOS_SRC_PATH}/example/fixture EXE_EXAMPLE_FIXTURE = KokkosExample_Fixture OBJ_EXAMPLE_FIXTURE = Main.o TestFixture.o BoxElemPart.o diff --git a/lib/kokkos/example/global_2_local_ids/Makefile b/lib/kokkos/example/global_2_local_ids/Makefile index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644 --- a/lib/kokkos/example/global_2_local_ids/Makefile +++ b/lib/kokkos/example/global_2_local_ids/Makefile @@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +CXXFLAGS = -O3 -I$(SRC_DIR) +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/grow_array/Makefile b/lib/kokkos/example/grow_array/Makefile index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644 --- a/lib/kokkos/example/grow_array/Makefile +++ b/lib/kokkos/example/grow_array/Makefile @@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +CXXFLAGS = -O3 -I$(SRC_DIR) +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/ichol/Makefile b/lib/kokkos/example/ichol/Makefile deleted file mode 100644 index 57e972f042d94c337e8d6b73fffcec2e0d40ad90..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -SCOTCH_PATH = /home/hcedwar/scotch/6.0.0 -KOKKOS_PATH = ../.. - -vpath %.cpp ${KOKKOS_PATH}/example/ichol/src ${KOKKOS_PATH}/example/ichol/example - -EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/ichol/src/*.hpp ${KOKKOS_PATH}/example/ichol/example/*.hpp ) - -default: build_all - echo "End Build" - -include $(KOKKOS_PATH)/Makefile.kokkos - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - CXX = $(NVCC_WRAPPER) - CXXFLAGS ?= -O3 - LINK = $(CXX) - LDFLAGS ?= -lpthread -else - CXX ?= g++ - CXXFLAGS ?= -O3 - LINK ?= $(CXX) - LDFLAGS ?= -lpthread -endif - -KOKKOS_CXXFLAGS += \ - -I${KOKKOS_PATH}/example/ichol/src \ - -I${KOKKOS_PATH}/example/ichol/example \ - -I${SCOTCH_PATH}/include - -EXE_EXAMPLE_ICHOL_THREADS = KokkosExample_ichol_threads -OBJ_EXAMPLE_ICHOL_THREADS = example_chol_performance_device_pthread.o - -EXE_EXAMPLE_ICHOL_CUDA = KokkosExample_ichol_cuda -OBJ_EXAMPLE_ICHOL_CUDA = example_chol_performance_device_cuda.o - -TARGETS = $(EXE_EXAMPLE_ICHOL_THREADS) $(EXE_EXAMPLE_ICHOL_CUDA) - -#TEST_TARGETS = - -$(EXE_EXAMPLE_ICHOL_THREADS) : $(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \ - $(OBJ_EXAMPLE_ICHOL_THREADS) $(KOKKOS_LIBS) $(LIB) \ - -L${SCOTCH_PATH}/lib -lscotch -lscotcherr -lscotcherrexit \ - -o $(EXE_EXAMPLE_ICHOL_THREADS) - -$(EXE_EXAMPLE_ICHOL_CUDA) : $(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) \ - $(OBJ_EXAMPLE_ICHOL_CUDA) $(KOKKOS_LIBS) $(LIB) \ - -L${SCOTCH_PATH}/lib -lscotch -lscotcherr -lscotcherrexit \ - -o $(EXE_EXAMPLE_ICHOL_CUDA) - -build_all : $(TARGETS) - -test : build_all - -clean: kokkos-clean - rm -f *.o $(TARGETS) - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< - diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp b/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp deleted file mode 100644 index ca819e4f97028eb0782c7e6c5638945d40f7597b..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/example/example_chol_performance_device.hpp +++ /dev/null @@ -1,240 +0,0 @@ -#pragma once -#ifndef __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__ -#define __EXAMPLE_CHOL_PERFORMANCE_DEVICE_HPP__ - -#include <Kokkos_Core.hpp> -#include <impl/Kokkos_Timer.hpp> - -#include "util.hpp" - -#include "crs_matrix_base.hpp" -#include "crs_matrix_view.hpp" -#include "crs_row_view.hpp" - -#include "graph_helper_scotch.hpp" -#include "symbolic_factor_helper.hpp" -#include "crs_matrix_helper.hpp" - -#include "task_view.hpp" - -#include "task_factory.hpp" - -#include "chol.hpp" - -namespace Tacho { - - using namespace std; - - template<typename ValueType, - typename OrdinalType, - typename SizeType = OrdinalType, - typename SpaceType = void> - int exampleCholPerformanceDevice(const string file_input, - const int treecut, - const int prunecut, - const int seed, - const int nthreads, - const int max_task_dependence, - const int max_concurrency, - const int team_size, - const int fill_level, - const int league_size, - const bool skip_serial, - const bool verbose) { - typedef ValueType value_type; - typedef OrdinalType ordinal_type; - typedef SizeType size_type; - typedef typename - Kokkos::Impl::is_space< SpaceType >::host_mirror_space::execution_space - HostSpaceType ; - - typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>, - Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType; - - typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType> - CrsMatrixBaseType; - - typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> - CrsMatrixBaseHostType; - - typedef Kokkos::MemoryUnmanaged MemoryUnmanaged ; - - typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryUnmanaged > - CrsMatrixNestedType; - - - typedef GraphHelper_Scotch<CrsMatrixBaseHostType> GraphHelperType; - typedef SymbolicFactorHelper<CrsMatrixBaseHostType> SymbolicFactorHelperType; - - typedef CrsMatrixView<CrsMatrixNestedType> CrsMatrixViewType; - typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType; - - typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType> CrsHierMatrixBaseType; - - typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType; - typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType; - - int r_val = 0; - - Kokkos::Timer timer; - double - t_import = 0.0, - t_reorder = 0.0, - t_symbolic = 0.0, - t_flat2hier = 0.0, - t_factor_task = 0.0; - - cout << "CholPerformanceDevice:: import input file = " << file_input << endl; - CrsMatrixBaseHostType AA("AA"); - { - timer.reset(); - - ifstream in; - in.open(file_input); - if (!in.good()) { - cout << "Failed in open the file: " << file_input << endl; - return ++r_val; - } - AA.importMatrixMarket(in); - - t_import = timer.seconds(); - - if (verbose) { - AA.showMe( std::cout ); - std::cout << endl; - } - } - cout << "CholPerformanceDevice:: import input file::time = " << t_import << endl; - - cout << "CholPerformanceDevice:: reorder the matrix" << endl; - CrsMatrixBaseHostType PA("Permuted AA"); - - // '*_UU' is the permuted base upper triangular matrix - CrsMatrixBaseHostType host_UU("host_UU"); - CrsMatrixBaseType device_UU("UU"); - CrsHierMatrixBaseType device_HU("HU");; - - // typename CrsMatrixBaseHostType host_UU("host_UU"); - - { - typename GraphHelperType::size_type_array rptr("Graph::RowPtrArray", AA.NumRows() + 1); - typename GraphHelperType::ordinal_type_array cidx("Graph::ColIndexArray", AA.NumNonZeros()); - - AA.convertGraph(rptr, cidx); - GraphHelperType S("ScotchHelper", - AA.NumRows(), - rptr, - cidx, - seed); - { - timer.reset(); - - S.computeOrdering(treecut, 0); - S.pruneTree(prunecut); - - PA.copy(S.PermVector(), S.InvPermVector(), AA); - - t_reorder = timer.seconds(); - - if (verbose) { - S.showMe( std::cout ); - std::cout << std::endl ; - PA.showMe( std::cout ); - std::cout << std::endl ; - } - } - - // Symbolic factorization adds non-zero entries - // for factorization levels. - // Runs on the host process and currently requires std::sort. - - cout << "CholPerformanceDevice:: reorder the matrix::time = " << t_reorder << endl; - { - SymbolicFactorHelperType F(PA, league_size); - timer.reset(); - F.createNonZeroPattern(fill_level, Uplo::Upper, host_UU); - t_symbolic = timer.seconds(); - cout << "CholPerformanceDevice:: AA (nnz) = " << AA.NumNonZeros() << ", host_UU (nnz) = " << host_UU.NumNonZeros() << endl; - - if (verbose) { - F.showMe( std::cout ); - std::cout << std::endl ; - host_UU.showMe( std::cout ); - std::cout << std::endl ; - } - } - cout << "CholPerformanceDevice:: symbolic factorization::time = " << t_symbolic << endl; - - //---------------------------------------------------------------------- - // Allocate device_UU conformal to host_UU - // and deep_copy host_UU arrays to device_UU arrays. - // Set up device_HU referencing blocks of device_UU - - { - timer.reset(); - - device_UU.copy( host_UU ); - - CrsMatrixHelper::flat2hier(Uplo::Upper, device_UU, device_HU, - S.NumBlocks(), - S.RangeVector(), - S.TreeVector()); - - // Filling non-zero block matrixes' row ranges within block view. - // This is performed entirely in the 'device_HU' space. - - CrsMatrixHelper::fillRowViewArray( device_HU ); - - t_flat2hier = timer.seconds(); - - cout << "CholPerformanceDevice:: Hier (dof, nnz) = " << device_HU.NumRows() << ", " << device_HU.NumNonZeros() << endl; - } - cout << "CholPerformanceDevice:: copy base matrix and construct hierarchical matrix::time = " << t_flat2hier << endl; - } - - cout << "CholPerformanceDevice:: max concurrency = " << max_concurrency << endl; - - const size_t max_task_size = 4*sizeof(CrsTaskViewType)+128; - cout << "CholPerformanceDevice:: max task size = " << max_task_size << endl; - - //---------------------------------------------------------------------- - // From here onward all work is on the device. - //---------------------------------------------------------------------- - - { - typename TaskFactoryType::policy_type policy(max_concurrency, - max_task_size, - max_task_dependence, - team_size); - - cout << "CholPerformanceDevice:: ByBlocks factorize the matrix:: team_size = " << team_size << endl; - CrsHierTaskViewType H( device_HU ); - { - timer.reset(); - { - // auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks>:: - auto future = policy.proc_create_team(Chol<Uplo::Upper,AlgoChol::ByBlocks,Variant::Two>:: - TaskFunctor<CrsHierTaskViewType>(policy,H), 0); - policy.spawn(future); - Kokkos::Experimental::wait(policy); - } - t_factor_task += timer.seconds(); - - cout << "CholPerformanceDevice:: policy.allocated_task_count = " - << policy.allocated_task_count() - << endl ; - - if (verbose) { - host_UU.copy( device_UU ); - host_UU.showMe( std::cout ); - std::cout << endl; - } - } - cout << "CholPerformanceDevice:: ByBlocks factorize the matrix::time = " << t_factor_task << endl; - } - - return r_val; - } -} - -#endif diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp deleted file mode 100644 index 3a0df586b5af15a9c56582d216ecac6e5221853d..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include <Kokkos_Core.hpp> - -#include <Cuda/Kokkos_Cuda_TaskPolicy.hpp> - -using namespace std; - -typedef double value_type; -typedef int ordinal_type; -typedef int size_type; - -#include "example_chol_performance_device.hpp" - -using namespace Tacho; - -int main (int argc, char *argv[]) { - - string file_input = "test.mtx"; - int nthreads = 1; - int max_task_dependence = 3; - int max_concurrency = 1024; - int team_size = 1; - int fill_level = 0; - int treecut = 0; - int prunecut = 0; - int seed = 0; - int league_size = 1; - bool verbose = false; - for (int i=0;i<argc;++i) { - if ((strcmp(argv[i],"--file-input") ==0)) { file_input = argv[++i]; continue;} - if ((strcmp(argv[i],"--nthreads") ==0)) { nthreads = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--max-concurrency") ==0)) { max_concurrency = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--team-size") ==0)) { team_size = atoi(argv[++i]); continue;} - - if ((strcmp(argv[i],"--fill-level") ==0)) { fill_level = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--league-size") ==0)) { league_size = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--treecut") ==0)) { treecut = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--prunecut") ==0)) { prunecut = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--seed") ==0)) { seed = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--enable-verbose") ==0)) { verbose = true; continue;} - } - - int r_val = 0; - { - typedef Kokkos::Cuda exec_space; - - Kokkos::DefaultHostExecutionSpace::initialize(nthreads); - - exec_space::initialize(); - exec_space::print_configuration(cout, true); - - r_val = exampleCholPerformanceDevice - <value_type,ordinal_type,size_type,exec_space> - (file_input, - treecut, - prunecut, - seed, - nthreads, - max_task_dependence, max_concurrency, team_size, - fill_level, league_size, - (nthreads != 1), // skip_serial - verbose); - - exec_space::finalize(); - - Kokkos::DefaultHostExecutionSpace::finalize(); - } - - return r_val; -} diff --git a/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp b/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp deleted file mode 100644 index 68f520cf6620888c2a8de2f8cabe06a5b9e8b607..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include <Kokkos_Core.hpp> - -#include <Kokkos_Threads.hpp> -#include <Threads/Kokkos_Threads_TaskPolicy.hpp> - -using namespace std; - -typedef double value_type; -typedef int ordinal_type; -typedef int size_type; - -typedef Kokkos::Threads exec_space; - -#include "example_chol_performance_device.hpp" - -using namespace Tacho; - -int main (int argc, char *argv[]) { - - string file_input = "test.mtx"; - int nthreads = 1; - int max_task_dependence = 3; - int max_concurrency = 1024; - int team_size = 1; - int fill_level = 0; - int treecut = 0; - int prunecut = 0; - int seed = 0; - int league_size = 1; - bool verbose = false; - for (int i=0;i<argc;++i) { - if ((strcmp(argv[i],"--file-input") ==0)) { file_input = argv[++i]; continue;} - if ((strcmp(argv[i],"--nthreads") ==0)) { nthreads = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--max-task-dependence")==0)) { max_task_dependence = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--max-concurrency") ==0)) { max_concurrency = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--team-size") ==0)) { team_size = atoi(argv[++i]); continue;} - - if ((strcmp(argv[i],"--fill-level") ==0)) { fill_level = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--league-size") ==0)) { league_size = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--treecut") ==0)) { treecut = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--prunecut") ==0)) { prunecut = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--seed") ==0)) { seed = atoi(argv[++i]); continue;} - if ((strcmp(argv[i],"--enable-verbose") ==0)) { verbose = true; continue;} - } - - int r_val = 0; - { - exec_space::initialize(nthreads); - exec_space::print_configuration(cout, true); - - r_val = exampleCholPerformanceDevice - <value_type,ordinal_type,size_type,exec_space> - (file_input, - treecut, - prunecut, - seed, - nthreads, - max_task_dependence, max_concurrency, team_size, - fill_level, league_size, - (nthreads != 1), // skip_serial - verbose); - - exec_space::finalize(); - } - - return r_val; -} diff --git a/lib/kokkos/example/ichol/src/chol.hpp b/lib/kokkos/example/ichol/src/chol.hpp deleted file mode 100644 index e8aa4e9189ffb607c91cc2b86811084b69a45393..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/chol.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once -#ifndef __CHOL_HPP__ -#define __CHOL_HPP__ - -/// \file chol.hpp -/// \brief Incomplete Cholesky factorization front interface. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "control.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - // tasking interface - // * default behavior is for non-by-blocks tasks - // * control is only used for by-blocks algorithms - // =============================================== - template<int ArgUplo, int ArgAlgo, - int ArgVariant = Variant::One, - template<int,int> class ControlType = Control> - class Chol { - public: - - // function interface - // ================== - template<typename ExecViewType> - KOKKOS_INLINE_FUNCTION - static int invoke(typename ExecViewType::policy_type &policy, - const typename ExecViewType::policy_type::member_type &member, - typename ExecViewType::matrix_type &A); - - // task-data parallel interface - // ============================ - template<typename ExecViewType> - class TaskFunctor { - public: - typedef typename ExecViewType::policy_type policy_type; - typedef typename policy_type::member_type member_type; - typedef int value_type; - - private: - typename ExecViewType::matrix_type _A; - - policy_type _policy; - - public: - KOKKOS_INLINE_FUNCTION - TaskFunctor(const policy_type & P , - const typename ExecViewType::matrix_type & A) - : _A(A), - _policy(P) - { } - - string Label() const { return "Chol"; } - - // task execution - KOKKOS_INLINE_FUNCTION - void apply(value_type &r_val) { - r_val = Chol::invoke<ExecViewType>(_policy, _policy.member_single(), _A); - } - - // task-data execution - KOKKOS_INLINE_FUNCTION - void apply(const member_type &member, value_type &r_val) { - - const int result = Chol::invoke<ExecViewType>(_policy, member, _A); - - if ( 0 == member.team_rank() ) { r_val = result ; } - - } - - }; - - }; -} - - -// unblocked version blas operations -#include "scale.hpp" - -// blocked version blas operations -#include "gemm.hpp" -#include "trsm.hpp" -#include "herk.hpp" - -// cholesky -#include "chol_u.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/chol_u.hpp b/lib/kokkos/example/ichol/src/chol_u.hpp deleted file mode 100644 index 0465ef8f35c8574189c767b6f97dfc7a6344f2cb..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/chol_u.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once -#ifndef __CHOL_U_HPP__ -#define __CHOL_U_HPP__ - -/// \file chol_u.hpp -/// \brief Upper Cholesky factorization variations -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -// testing task-data parallelism -// #include "chol_u_unblocked_dummy.hpp" - -// flame style implementation -//#include "chol_unblocked.hpp" -//#include "chol_u_blocked.hpp" - -// triple for loop -#include "chol_u_unblocked_opt1.hpp" -#include "chol_u_unblocked_opt2.hpp" - -// partitioned block algorithms: see control.hpp -#include "chol_u_right_look_by_blocks.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp b/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp deleted file mode 100644 index e21bafa9f1db5e9dda1a0e24f21a4552f011d27a..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp +++ /dev/null @@ -1,394 +0,0 @@ -#pragma once -#ifndef __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__ -#define __CHOL_U_RIGHT_LOOK_BY_BLOCKS_HPP__ - -/// \file chol_u_right_look_by_blocks.hpp -/// \brief Cholesky factorization by-blocks -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -/// The Partitioned-Block Matrix (PBM) is sparse and a block itself is a view of a sparse matrix. -/// The algorithm generates tasks with a given sparse block matrix structure. - -// basic utils -#include "util.hpp" -#include "control.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template< typename CrsTaskViewType > - KOKKOS_INLINE_FUNCTION - int releaseFutures( typename CrsTaskViewType::matrix_type & A ) - { - typedef typename CrsTaskViewType::ordinal_type ordinal_type; - typedef typename CrsTaskViewType::row_view_type row_view_type; - typedef typename CrsTaskViewType::future_type future_type; - - row_view_type a(A,0); - - const ordinal_type nnz = a.NumNonZeros(); - - for (ordinal_type j=0;j<nnz;++j) { - a.Value(j).setFuture( future_type() ); - } - - return nnz ; - } - - // ======================================== - // detailed workflow of by-blocks algorithm - // ======================================== - template<int ArgVariant, - template<int,int> class ControlType, - typename CrsTaskViewType> - class CholUpperRightLookByBlocks { - public: - KOKKOS_INLINE_FUNCTION - static int genScalarTask(typename CrsTaskViewType::policy_type &policy, - typename CrsTaskViewType::matrix_type &A) { - typedef typename CrsTaskViewType::value_type value_type; - typedef typename CrsTaskViewType::row_view_type row_view_type; - - typedef typename CrsTaskViewType::future_type future_type; - typedef typename CrsTaskViewType::task_factory_type task_factory_type; - - row_view_type a(A, 0); - value_type &aa = a.Value(0); - - // construct a task - future_type f = task_factory_type::create(policy, - typename Chol<Uplo::Upper, - CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Chol)> - ::template TaskFunctor<value_type>(policy,aa)); - - -if ( false ) { - printf("Chol [%d +%d)x[%d +%d) spawn depend %d\n" - , aa.OffsetRows() - , aa.NumRows() - , aa.OffsetCols() - , aa.NumCols() - , int( ! aa.Future().is_null() ) - ); -} - - // manage dependence - task_factory_type::addDependence(policy, f, aa.Future()); - aa.setFuture(f); - - // spawn a task - task_factory_type::spawn(policy, f, true /* high priority */ ); - - return 1; - } - - KOKKOS_INLINE_FUNCTION - static int genTrsmTasks(typename CrsTaskViewType::policy_type &policy, - typename CrsTaskViewType::matrix_type &A, - typename CrsTaskViewType::matrix_type &B) { - typedef typename CrsTaskViewType::ordinal_type ordinal_type; - typedef typename CrsTaskViewType::row_view_type row_view_type; - typedef typename CrsTaskViewType::value_type value_type; - - typedef typename CrsTaskViewType::future_type future_type; - typedef typename CrsTaskViewType::task_factory_type task_factory_type; - - row_view_type a(A,0), b(B,0); - value_type &aa = a.Value(0); - -if ( false ) { - printf("genTrsmTasks after aa.Future().reference_count = %d\n" - , aa.Future().reference_count()); -} - const ordinal_type nnz = b.NumNonZeros(); - for (ordinal_type j=0;j<nnz;++j) { - typedef typename - Trsm< Side::Left,Uplo::Upper,Trans::ConjTranspose, - CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Trsm)> - ::template TaskFunctor<double,value_type,value_type> - FunctorType ; - - value_type &bb = b.Value(j); - - future_type f = task_factory_type - ::create(policy, FunctorType(policy,Diag::NonUnit, 1.0, aa, bb)); - -if ( false ) { - printf("Trsm [%d +%d)x[%d +%d) spawn depend %d %d\n" - , bb.OffsetRows() - , bb.NumRows() - , bb.OffsetCols() - , bb.NumCols() - , int( ! aa.Future().is_null() ) - , int( ! bb.Future().is_null() ) - ); -} - - // trsm dependence - task_factory_type::addDependence(policy, f, aa.Future()); - - // self - task_factory_type::addDependence(policy, f, bb.Future()); - - // place task signature on b - bb.setFuture(f); - - // spawn a task - task_factory_type::spawn(policy, f, true /* high priority */); - } - -if ( false ) { - printf("genTrsmTasks after aa.Future().reference_count = %d\n" - , aa.Future().reference_count()); -} - - return nnz ; - } - - KOKKOS_INLINE_FUNCTION - static int genHerkTasks(typename CrsTaskViewType::policy_type &policy, - typename CrsTaskViewType::matrix_type &A, - typename CrsTaskViewType::matrix_type &C) { - typedef typename CrsTaskViewType::ordinal_type ordinal_type; - typedef typename CrsTaskViewType::value_type value_type; - typedef typename CrsTaskViewType::row_view_type row_view_type; - - typedef typename CrsTaskViewType::future_type future_type; - typedef typename CrsTaskViewType::task_factory_type task_factory_type; - - // case that X.transpose, A.no_transpose, Y.no_transpose - - row_view_type a(A,0), c; - - const ordinal_type nnz = a.NumNonZeros(); - ordinal_type herk_count = 0 ; - ordinal_type gemm_count = 0 ; - - // update herk - for (ordinal_type i=0;i<nnz;++i) { - const ordinal_type row_at_i = a.Col(i); - value_type &aa = a.Value(i); - - c.setView(C, row_at_i); - - ordinal_type idx = 0; - for (ordinal_type j=i;j<nnz && (idx > -2);++j) { - const ordinal_type col_at_j = a.Col(j); - value_type &bb = a.Value(j); - - if (row_at_i == col_at_j) { - idx = c.Index(row_at_i, idx); - if (idx >= 0) { - ++herk_count ; - value_type &cc = c.Value(idx); - future_type f = task_factory_type - ::create(policy, - typename Herk<Uplo::Upper,Trans::ConjTranspose, - CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Herk)> - ::template TaskFunctor<double,value_type,value_type>(policy,-1.0, aa, 1.0, cc)); - - -if ( false ) { - printf("Herk [%d +%d)x[%d +%d) spawn %d %d\n" - , cc.OffsetRows() - , cc.NumRows() - , cc.OffsetCols() - , cc.NumCols() - , int( ! aa.Future().is_null() ) - , int( ! cc.Future().is_null() ) - ); -} - - // dependence - task_factory_type::addDependence(policy, f, aa.Future()); - - // self - task_factory_type::addDependence(policy, f, cc.Future()); - - // place task signature on y - cc.setFuture(f); - - // spawn a task - task_factory_type::spawn(policy, f); - } - } else { - idx = c.Index(col_at_j, idx); - if (idx >= 0) { - ++gemm_count ; - value_type &cc = c.Value(idx); - future_type f = task_factory_type - ::create(policy, - typename Gemm<Trans::ConjTranspose,Trans::NoTranspose, - CtrlDetail(ControlType,AlgoChol::ByBlocks,ArgVariant,Gemm)> - ::template TaskFunctor<double,value_type,value_type,value_type>(policy,-1.0, aa, bb, 1.0, cc)); - - -if ( false ) { - printf("Gemm [%d +%d)x[%d +%d) spawn %d %d %d\n" - , cc.OffsetRows() - , cc.NumRows() - , cc.OffsetCols() - , cc.NumCols() - , int( ! aa.Future().is_null() ) - , int( ! bb.Future().is_null() ) - , int( ! cc.Future().is_null() ) - ); -} - - // dependence - task_factory_type::addDependence(policy, f, aa.Future()); - task_factory_type::addDependence(policy, f, bb.Future()); - - // self - task_factory_type::addDependence(policy, f, cc.Future()); - - // place task signature on y - cc.setFuture(f); - - // spawn a task - task_factory_type::spawn(policy, f); - } - } - } - } - -if ( false ) { -printf("genHerkTask Herk(%ld) Gemm(%ld)\n",(long)herk_count,(long)gemm_count); -} - - return herk_count + gemm_count ; - } - - }; - - // specialization for different task generation in right looking by-blocks algorithm - // ================================================================================= - template<int ArgVariant, template<int,int> class ControlType> - class Chol<Uplo::Upper,AlgoChol::RightLookByBlocks,ArgVariant,ControlType> { - public: - - // function interface - // ================== - template<typename ExecViewType> - KOKKOS_INLINE_FUNCTION - static int invoke(typename ExecViewType::policy_type &policy, - const typename ExecViewType::policy_type::member_type &member, - typename ExecViewType::matrix_type & A, - int checkpoint ) - { - typedef typename ExecViewType::row_view_type row_view_type ; - - enum { CYCLE = 2 }; - - typename ExecViewType::matrix_type - ATL, ATR, A00, A01, A02, - ABL, ABR, A10, A11, A12, - A20, A21, A22; - - Part_2x2(A, ATL, ATR, - /**/ABL, ABR, - checkpoint, checkpoint, Partition::TopLeft); - - int tasks_spawned = 0 ; - int futures_released = 0 ; - - for ( int i = 0 ; i < CYCLE && ATL.NumRows() < A.NumRows() ; ++i ) { - Part_2x2_to_3x3(ATL, ATR, /**/ A00, A01, A02, - /*******/ /**/ A10, A11, A12, - ABL, ABR, /**/ A20, A21, A22, - 1, 1, Partition::BottomRight); - // ----------------------------------------------------- - // Spawning tasks: - - // A11 = chol(A11) : #task = 1 - tasks_spawned += - CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> - ::genScalarTask(policy, A11); - - // A12 = inv(triu(A11)') * A12 : #tasks = non-zero row blocks - tasks_spawned += - CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> - ::genTrsmTasks(policy, A11, A12); - - // A22 = A22 - A12' * A12 : #tasks = highly variable - tasks_spawned += - CholUpperRightLookByBlocks<ArgVariant,ControlType,ExecViewType> - ::genHerkTasks(policy, A12, A22); - - // ----------------------------------------------------- - // Can release futures of A11 and A12 - - futures_released += releaseFutures<ExecViewType>( A11 ); - futures_released += releaseFutures<ExecViewType>( A12 ); - -if ( false ) { - printf("Chol iteration(%d) task_count(%d) cumulative: spawn(%d) release(%d)\n" - , int(ATL.NumRows()) - , policy.allocated_task_count() - , tasks_spawned , futures_released - ); -} - - // ----------------------------------------------------- - Merge_3x3_to_2x2(A00, A01, A02, /**/ ATL, ATR, - A10, A11, A12, /**/ /******/ - A20, A21, A22, /**/ ABL, ABR, - Partition::TopLeft); - - } - - return ATL.NumRows(); - } - - // task-data parallel interface - // ============================ - template<typename ExecViewType> - class TaskFunctor { - public: - typedef typename ExecViewType::policy_type policy_type; - typedef typename ExecViewType::future_type future_type; - typedef typename policy_type::member_type member_type; - typedef int value_type; - - private: - typename ExecViewType::matrix_type _A; - - policy_type _policy; - int _checkpoint ; - - public: - KOKKOS_INLINE_FUNCTION - TaskFunctor(const policy_type & P , - const typename ExecViewType::matrix_type & A) - : _A(A), - _policy(P), - _checkpoint(0) - { } - - string Label() const { return "Chol"; } - - // task-data execution - KOKKOS_INLINE_FUNCTION - void apply(const member_type &member, value_type &r_val) - { - if (member.team_rank() == 0) { - // Clear out previous dependence - _policy.clear_dependence( this ); - - _checkpoint = Chol::invoke<ExecViewType>(_policy, member, _A, _checkpoint); - - if ( _checkpoint < _A.NumRows() ) _policy.respawn_needing_memory(this); - - r_val = 0 ; - } - return ; - } - - }; - - }; -} - -#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp deleted file mode 100644 index 3bb99c71424f491bbb5bea712475fcac116ad24e..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once -#ifndef __CHOL_U_UNBLOCKED_OPT1_HPP__ -#define __CHOL_U_UNBLOCKED_OPT1_HPP__ - -/// \file chol_u_unblocked_opt1.hpp -/// \brief Unblocked incomplete Chloesky factorization. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template<> - template<typename CrsExecViewType> - KOKKOS_INLINE_FUNCTION - int - Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::One> - ::invoke(typename CrsExecViewType::policy_type &policy, - const typename CrsExecViewType::policy_type::member_type &member, - typename CrsExecViewType::matrix_type &A) { - - typedef typename CrsExecViewType::value_type value_type; - typedef typename CrsExecViewType::ordinal_type ordinal_type; - typedef typename CrsExecViewType::row_view_type row_view_type; - - // row_view_type r1t, r2t; - - for (ordinal_type k=0;k<A.NumRows();++k) { - //r1t.setView(A, k); - row_view_type &r1t = A.RowView(k); - - // extract diagonal from alpha11 - value_type &alpha = r1t.Value(0); - - if (member.team_rank() == 0) { - // if encounter null diag or wrong index, return -(row + 1) - if (abs(alpha) == 0.0 || r1t.Col(0) != k) - return -(k + 1); - - // error handling should be more carefully designed - - // sqrt on diag - // alpha = sqrt(real(alpha)); - alpha = sqrt(alpha); - } - member.team_barrier(); - - const ordinal_type nnz_r1t = r1t.NumNonZeros(); - - if (nnz_r1t) { - // inverse scale - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), - [&](const ordinal_type j) { - r1t.Value(j) /= alpha; - }); - - member.team_barrier(); - - // hermitian rank update - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), - [&](const ordinal_type i) { - const ordinal_type row_at_i = r1t.Col(i); - // const value_type val_at_i = conj(r1t.Value(i)); - const value_type val_at_i = r1t.Value(i); - - //r2t.setView(A, row_at_i); - row_view_type &r2t = A.RowView(row_at_i); - ordinal_type idx = 0; - - for (ordinal_type j=i;j<nnz_r1t && (idx > -2);++j) { - const ordinal_type col_at_j = r1t.Col(j); - idx = r2t.Index(col_at_j, idx); - - if (idx >= 0) { - const value_type val_at_j = r1t.Value(j); - r2t.Value(idx) -= val_at_i*val_at_j; - } - } - }); - } - } - return 0; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp b/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp deleted file mode 100644 index e7d1dc826235120a84af25ff239fb705c65489f0..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp +++ /dev/null @@ -1,154 +0,0 @@ -#pragma once -#ifndef __CHOL_U_UNBLOCKED_OPT2_HPP__ -#define __CHOL_U_UNBLOCKED_OPT2_HPP__ - -/// \file chol_u_unblocked_opt2.hpp -/// \brief Unblocked incomplete Chloesky factorization; version for data parallel sharing L1 cache. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template<> - template<typename CrsExecViewType> - KOKKOS_INLINE_FUNCTION - int - Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::Two> - ::invoke(typename CrsExecViewType::policy_type &policy, - const typename CrsExecViewType::policy_type::member_type &member, - typename CrsExecViewType::matrix_type &A) { - - typedef typename CrsExecViewType::value_type value_type; - typedef typename CrsExecViewType::ordinal_type ordinal_type; - typedef typename CrsExecViewType::row_view_type row_view_type; - -if ( false && member.team_rank() == 0 ) { - printf("Chol [%d +%d)x[%d +%d) begin\n" - , A.OffsetRows() - , A.NumRows() - , A.OffsetCols() - , A.NumCols() - ); -} - - // row_view_type r1t, r2t; - - for (ordinal_type k=0;k<A.NumRows();++k) { - //r1t.setView(A, k); - row_view_type &r1t = A.RowView(k); - - // extract diagonal from alpha11 - value_type &alpha = r1t.Value(0); - - if (member.team_rank() == 0) { - // if encounter null diag or wrong index, return -(row + 1) - if (abs(alpha) == 0.0 || r1t.Col(0) != k) - return -(k + 1); - - // error handling should be more carefully designed - - // sqrt on diag - // alpha = sqrt(real(alpha)); - alpha = sqrt(alpha); - } - member.team_barrier(); - - -if ( false && member.team_rank() == 0 ) { - printf("Chol [%d +%d)x[%d +%d) local row %d\n" - , A.OffsetRows() - , A.NumRows() - , A.OffsetCols() - , A.NumCols() - , int(k) - ); -} - - - const ordinal_type nnz_r1t = r1t.NumNonZeros(); - - if (nnz_r1t) { - // inverse scale - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), - [&](const ordinal_type j) { - r1t.Value(j) /= alpha; - }); - - member.team_barrier(); - - -if ( false && member.team_rank() == 0 ) { - printf("Chol [%d +%d)x[%d +%d) local row %d nnz_r1t\n" - , A.OffsetRows() - , A.NumRows() - , A.OffsetCols() - , A.NumCols() - , int(k) - ); -} - - // hermitian rank update - for (ordinal_type i=1;i<nnz_r1t;++i) { - const ordinal_type row_at_i = r1t.Col(i); - // const value_type val_at_i = conj(r1t.Value(i)); - const value_type val_at_i = r1t.Value(i); - - //r2t.setView(A, row_at_i); - row_view_type &r2t = A.RowView(row_at_i); - - ordinal_type member_idx = 0 ; - - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, i, nnz_r1t), - [&](const ordinal_type j) { - if (member_idx > -2) { - const ordinal_type col_at_j = r1t.Col(j); - member_idx = r2t.Index(col_at_j, member_idx); - if (member_idx >= 0) { - const value_type val_at_j = r1t.Value(j); - r2t.Value(member_idx) -= val_at_i*val_at_j; - } - } - }); - } - } - - -if ( false ) { -member.team_barrier(); -if ( member.team_rank() == 0 ) { - printf("Chol [%d +%d)x[%d +%d) local row %d end\n" - , A.OffsetRows() - , A.NumRows() - , A.OffsetCols() - , A.NumCols() - , int(k) - ); -} -} - - } - - -if ( false ) { -member.team_barrier(); -if ( member.team_rank() == 0 ) { - printf("Chol [%d +%d)x[%d +%d) end\n" - , A.OffsetRows() - , A.NumRows() - , A.OffsetCols() - , A.NumCols() - ); -} -} - - - return 0; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/control.hpp b/lib/kokkos/example/ichol/src/control.hpp deleted file mode 100644 index bf5efef9fded8685f646d81855469f6f363b1e73..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/control.hpp +++ /dev/null @@ -1,110 +0,0 @@ -#pragma once -#ifndef __CONTROL_HPP__ -#define __CONTROL_HPP__ - -#include "util.hpp" - -/// \file control.hpp -/// \brief A collection of control trees composing high-level variants of algorithms. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -/// description is a bit wrong - -using namespace std; - -namespace Tacho { - - // forward declaration for control tree - template<int ArgAlgo, int ArgVariant> - struct Control { - static constexpr int Self[2] = { ArgAlgo, ArgVariant }; - }; - - // ---------------------------------------------------------------------------------- - - // - CholByblocks Variant 1 - // * partitioned block matrix (blocks are sparse) - template<> struct Control<AlgoChol::ByBlocks,Variant::One> { - // chol var 1 : nested data parallel for is applied in the second inner loop - // chol var 2 : nested data parallel for is applied in the most inner loop - static constexpr int Chol[2] = { AlgoChol::UnblockedOpt, Variant::Two }; - static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; - static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; - static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; - }; - - // - CholByBlocks Variant 2 - // * diagonal blocks have nested dense blocks - template<> struct Control<AlgoChol::ByBlocks,Variant::Two> { - static constexpr int Chol[2] = { AlgoChol::UnblockedOpt, Variant::One }; - static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; - static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; - static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; - }; - - // - CholByBlocks Variant 3 - // * all blocks have nested dense blocks (full supernodal algorithm) - // template<> struct Control<AlgoChol::ByBlocks,Variant::Three> { - // static constexpr int Chol[2] = { AlgoChol::NestedDenseBlock, Variant::One }; - // static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseBlock, Variant::One }; - // static constexpr int Herk[2] = { AlgoHerk::NestedDenseBlock, Variant::One }; - // static constexpr int Gemm[2] = { AlgoGemm::NestedDenseBlock, Variant::One }; - // }; - - // - CholByBlocks Variant 4 - // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling) - // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> { - // static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; - // static constexpr int Trsm[2] = { AlgoTrsm::ForFactorBlocked, Variant::One }; - // static constexpr int Herk[2] = { AlgoHerk::ForFactorBlocked, Variant::One }; - // static constexpr int Gemm[2] = { AlgoGemm::ForFactorBlocked, Variant::One }; - //}; - - // - CholByBlocks Variant 5 - // * diagonal blocks have nested hier dense blocks (hierarchical task scheduling) - // template<> struct Control<AlgoChol::ByBlocks,Variant::Four> { - // static constexpr int Chol[2] = { AlgoChol::NestedDenseByBlocks, Variant::One }; - // static constexpr int Trsm[2] = { AlgoTrsm::NestedDenseByBlocks, Variant::One }; - // static constexpr int Herk[2] = { AlgoHerk::NestedDenseByBlocks, Variant::One }; - // static constexpr int Gemm[2] = { AlgoGemm::NestedDenseByBlocks, Variant::One }; - // }; - - // ---------------------------------------------------------------------------------- - - // - CholNestedDenseBlock - // * branch control between sparse and dense operations - template<> struct Control<AlgoChol::NestedDenseBlock,Variant::One> { - static constexpr int CholSparse[2] = { AlgoChol::UnblockedOpt, Variant::One }; - static constexpr int CholDense[2] = { AlgoChol::ExternalLapack, Variant::One }; - }; - - // - CholNestedDenseBlock - // * branch control between sparse and dense operations - template<> struct Control<AlgoChol::NestedDenseByBlocks,Variant::One> { - static constexpr int CholSparse[2] = { AlgoChol::UnblockedOpt, Variant::One }; - static constexpr int CholDenseByBlocks[2] = { AlgoChol::DenseByBlocks, Variant::One }; - }; - - // ---------------------------------------------------------------------------------- - - // - CholDenseBlock - // * dense matrix Cholesky-by-blocks - template<> struct Control<AlgoChol::DenseByBlocks,Variant::One> { - static constexpr int Chol[2] = { AlgoChol::ExternalLapack, Variant::One }; - static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas, Variant::One }; - static constexpr int Herk[2] = { AlgoHerk::ExternalBlas, Variant::One }; - static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; - }; - - template<> struct Control<AlgoGemm::DenseByBlocks,Variant::One> { - static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; - }; - - template<> struct Control<AlgoTrsm::DenseByBlocks,Variant::One> { - static constexpr int Gemm[2] = { AlgoGemm::ExternalBlas, Variant::One }; - static constexpr int Trsm[2] = { AlgoTrsm::ExternalBlas, Variant::One }; - }; - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/coo.hpp b/lib/kokkos/example/ichol/src/coo.hpp deleted file mode 100644 index 977f17e5c5fb2d9ce520548cc04bc15c107a4c60..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/coo.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once -#ifndef __COO_HPP__ -#define __COO_HPP__ - -/// \file coo.hpp -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - /// \class Coo - /// \brief Sparse coordinate format; (i, j, val). - template<typename CrsMatType> - class Coo { - public: - typedef typename CrsMatType::ordinal_type ordinal_type; - typedef typename CrsMatType::value_type value_type; - - public: - ordinal_type _i,_j; - value_type _val; - - public: - ordinal_type& Row() { return _i; } - ordinal_type& Col() { return _j; } - value_type& Val() { return _val; } - - ordinal_type Row() const { return _i; } - ordinal_type Col() const { return _j; } - value_type Val() const { return _val; } - - Coo() {} - - Coo(const ordinal_type i, - const ordinal_type j, - const value_type val) - : _i(i), - _j(j), - _val(val) - { } - - Coo(const Coo& b) - : _i(b._i), - _j(b._j), - _val(b._val) - { } - - Coo<CrsMatType>& operator=(const Coo<CrsMatType> &y) { - this->_i = y._i; - this->_j = y._j; - this->_val = y._val; - - return *this; - } - - /// \brief Compare "less" index i and j only. - bool operator<(const Coo<CrsMatType> &y) const { - ordinal_type r_val = (this->_i - y._i); - return (r_val == 0 ? this->_j < y._j : r_val < 0); - } - - /// \brief Compare "equality" only index i and j. - bool operator==(const Coo<CrsMatType> &y) const { - return (this->_i == y._i) && (this->_j == y._j); - } - - /// \brief Compare "in-equality" only index i and j. - bool operator!=(const Coo<CrsMatType> &y) const { - return !(*this == y); - } - }; - -} -#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base.hpp deleted file mode 100644 index ad08b8757e83c68b8a9224a1d41c3087930a2eb4..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_matrix_base.hpp +++ /dev/null @@ -1,598 +0,0 @@ -#pragma once -#ifndef __CRS_MATRIX_BASE_HPP__ -#define __CRS_MATRIX_BASE_HPP__ - -/// \file crs_matrix_base.hpp -/// \brief CRS matrix base object interfaces to user provided input matrices. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "coo.hpp" - -namespace Tacho { - - using namespace std; - - template< typename , typename > class TaskView ; - - template < typename CrsMatrixType > - struct GetCrsMatrixRowViewType { - typedef int type ; - }; - - - template < typename CrsMatrixViewType , typename TaskFactoryType > - struct GetCrsMatrixRowViewType - < TaskView<CrsMatrixViewType,TaskFactoryType> > - { - typedef typename CrsMatrixViewType::row_view_type type ; - }; - - /// \class CrsMatrixBase - /// \breif CRS matrix base object using Kokkos view and subview - template<typename ValueType, - typename OrdinalType, - typename SizeType = OrdinalType, - typename SpaceType = void, - typename MemoryTraits = void> - class CrsMatrixBase { - public: - typedef ValueType value_type; - typedef OrdinalType ordinal_type; - typedef SpaceType space_type; - typedef SizeType size_type; - typedef MemoryTraits memory_traits; - - // 1D view, layout does not matter; no template parameters for that - typedef Kokkos::View<size_type*, space_type,memory_traits> size_type_array; - typedef Kokkos::View<ordinal_type*,space_type,memory_traits> ordinal_type_array; - typedef Kokkos::View<value_type*, space_type,memory_traits> value_type_array; - - typedef typename size_type_array::value_type* size_type_array_ptr; - typedef typename ordinal_type_array::value_type* ordinal_type_array_ptr; - typedef typename value_type_array::value_type* value_type_array_ptr; - - // range type - template<typename T> using range_type = pair<T,T>; - - // external interface - typedef Coo<CrsMatrixBase> ijv_type; - - friend class CrsMatrixHelper; - - private: - - ordinal_type _m; //!< # of rows - ordinal_type _n; //!< # of cols - size_type _nnz; //!< # of nonzeros - size_type_array _ap; //!< pointers to column index and values - ordinal_type_array _aj; //!< column index compressed format - value_type_array _ax; //!< values - - public: - - typedef typename GetCrsMatrixRowViewType< ValueType >::type row_view_type ; - typedef Kokkos::View<row_view_type*,space_type> row_view_type_array; - - row_view_type_array _all_row_views ; - - protected: - - void createInternalArrays(const ordinal_type m, - const ordinal_type n, - const size_type nnz) { - _m = m; - _n = n; - _nnz = nnz; - - if (static_cast<ordinal_type>(_ap.dimension_0()) < m+1) - _ap = size_type_array("CrsMatrixBase::RowPtrArray", m+1); - - if (static_cast<size_type>(_aj.dimension_0()) < nnz) - _aj = ordinal_type_array("CrsMatrixBase::ColsArray", nnz); - - if (static_cast<size_type>(_ax.dimension_0()) < nnz) - _ax = value_type_array("CrsMatrixBase::ValuesArray", nnz); - } - - // Copy sparse matrix structure from coordinate format in 'mm' - // to CRS format in Views _ap, _aj, a_x. - void ijv2crs(const vector<ijv_type> &mm) { - - ordinal_type ii = 0; - size_type jj = 0; - - ijv_type prev = mm[0]; - _ap[ii++] = 0; - _aj[jj] = prev.Col(); - _ax[jj] = prev.Val(); - ++jj; - - for (typename vector<ijv_type>::const_iterator it=(mm.begin()+1);it<mm.end();++it) { - ijv_type aij = (*it); - - // row index - if (aij.Row() != prev.Row()) { - _ap[ii++] = jj; - } - - if (aij == prev) { - --jj; - _aj[jj] = aij.Col(); - _ax[jj] += aij.Val(); - } else { - _aj[jj] = aij.Col(); - _ax[jj] = aij.Val(); - } - ++jj; - - prev = aij; - } - - // add the last index to terminate the storage - _ap[ii++] = jj; - _nnz = jj; - } - - public: - - KOKKOS_INLINE_FUNCTION - void setNumNonZeros() { - if (_m) - _nnz = _ap[_m]; - } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumRows() const { return _m; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumCols() const { return _n; } - - KOKKOS_INLINE_FUNCTION - size_type NumNonZeros() const { return _nnz; } - - KOKKOS_INLINE_FUNCTION - size_type_array_ptr RowPtr() const { return &_ap[0]; } - - KOKKOS_INLINE_FUNCTION - ordinal_type_array_ptr ColPtr() const { return &_aj[0]; } - - KOKKOS_INLINE_FUNCTION - value_type_array_ptr ValuePtr() const { return &_ax[0];} - - KOKKOS_INLINE_FUNCTION - size_type RowPtr(const ordinal_type i) const { return _ap[i]; } - - KOKKOS_INLINE_FUNCTION - ordinal_type_array_ptr ColsInRow(const ordinal_type i) const { return _aj.data() + _ap[i] ; } - - KOKKOS_INLINE_FUNCTION - value_type_array_ptr ValuesInRow(const ordinal_type i) const { return _ax.data() + _ap[i] ; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumNonZerosInRow(const ordinal_type i) const { return (_ap[i+1] - _ap[i]); } - - KOKKOS_INLINE_FUNCTION - value_type& Value(const ordinal_type k) { return _ax[k]; } - - KOKKOS_INLINE_FUNCTION - value_type Value(const ordinal_type k) const { return _ax[k]; } - - /// \brief Default constructor. - KOKKOS_INLINE_FUNCTION - CrsMatrixBase() - : _m(0), - _n(0), - _nnz(0), - _ap(), - _aj(), - _ax() - { } - - /// \brief Constructor with label - CrsMatrixBase(const string & ) - : _m(0), - _n(0), - _nnz(0), - _ap(), - _aj(), - _ax() - { } - - /// \brief Copy constructor (shallow copy), for deep-copy use a method copy - template<typename VT, - typename OT, - typename ST, - typename SpT, - typename MT> - CrsMatrixBase(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) - : _m(b._m), - _n(b._n), - _nnz(b._nnz), - _ap(b._ap), - _aj(b._aj), - _ax(b._ax) - { } - - /// \brief Constructor to allocate internal data structures. - CrsMatrixBase(const string & , - const ordinal_type m, - const ordinal_type n, - const ordinal_type nnz) - : _m(m), - _n(n), - _nnz(nnz), - _ap("CrsMatrixBase::RowPtrArray", m+1), - _aj("CrsMatrixBase::ColsArray", nnz), - _ax("CrsMatrixBase::ValuesArray", nnz) - { } - - /// \brief Constructor to attach external arrays to the matrix. - CrsMatrixBase(const string &, - const ordinal_type m, - const ordinal_type n, - const ordinal_type nnz, - const size_type_array &ap, - const ordinal_type_array &aj, - const value_type_array &ax) - : _m(m), - _n(n), - _nnz(nnz), - _ap(ap), - _aj(aj), - _ax(ax) - { } - - // Allow the copy function access to the input CrsMatrixBase - // private data. - template<typename, typename, typename, typename, typename> - friend class CrsMatrixBase ; - - public: - /// \brief deep copy of matrix b, potentially different spaces - template< typename SpT > - int - copy(const CrsMatrixBase<ValueType,OrdinalType,SizeType,SpT,MemoryTraits> &b) { - - space_type::execution_space::fence(); - - createInternalArrays(b._m, b._n, b._nnz); - - space_type::execution_space::fence(); - - const auto ap_range = range_type<ordinal_type>(0, min(_ap.dimension_0(), b._ap.dimension_0())); - const auto aj_range = range_type<size_type> (0, min(_aj.dimension_0(), b._aj.dimension_0())); - const auto ax_range = range_type<size_type> (0, min(_ax.dimension_0(), b._ax.dimension_0())); - - Kokkos::deep_copy(Kokkos::subview( _ap, ap_range), - Kokkos::subview(b._ap, ap_range)); - Kokkos::deep_copy(Kokkos::subview( _aj, aj_range), - Kokkos::subview(b._aj, aj_range)); - - Kokkos::deep_copy(Kokkos::subview( _ax, ax_range), - Kokkos::subview(b._ax, ax_range)); - - space_type::execution_space::fence(); - - return 0; - } - - /// \brief deep copy of lower/upper triangular of matrix b - int - copy(const int uplo, - const CrsMatrixBase &b) { - - createInternalArrays(b._m, b._n, b._nnz); - - // assume that matrix b is sorted. - switch (uplo) { - case Uplo::Lower: { - _nnz = 0; - for (ordinal_type i=0;i<_m;++i) { - size_type jbegin = b._ap[i]; - size_type jend = b._ap[i+1]; - _ap[i] = _nnz; - for (size_type j=jbegin;j<jend && (i >= b._aj[j]);++j,++_nnz) { - _aj[_nnz] = b._aj[j]; - _ax[_nnz] = b._ax[j]; - } - } - _ap[_m] = _nnz; - break; - } - case Uplo::Upper: { - _nnz = 0; - for (ordinal_type i=0;i<_m;++i) { - size_type j = b._ap[i]; - size_type jend = b._ap[i+1]; - _ap[i] = _nnz; - for ( ;j<jend && (i > b._aj[j]);++j) ; - for ( ;j<jend;++j,++_nnz) { - _aj[_nnz] = b._aj[j]; - _ax[_nnz] = b._ax[j]; - } - } - _ap[_m] = _nnz; - break; - } - } - - return 0; - } - - /// \brief deep copy of matrix b with given permutation vectors - template<typename VT, - typename OT, - typename ST, - typename SpT, - typename MT> - int - copy(const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &p, - const typename CrsMatrixBase<VT,OT,ST,SpT,MT>::ordinal_type_array &ip, - const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) { - - createInternalArrays(b._m, b._n, b._nnz); - - // Question:: do I need to use Kokkos::vector ? - // in other words, where do we permute matrix in factoriztion ? - // permuting a matrix is a kernel ? - vector<ijv_type> tmp; - - // any chance to use parallel_for ? - _nnz = 0; - for (ordinal_type i=0;i<_m;++i) { - ordinal_type ii = ip[i]; - - size_type jbegin = b._ap[ii]; - size_type jend = b._ap[ii+1]; - - _ap[i] = _nnz; - for (size_type j=jbegin;j<jend;++j) { - ordinal_type jj = p[b._aj[j]]; - ijv_type aij(i, jj, b._ax[j]); - tmp.push_back(aij); - } - - sort(tmp.begin(), tmp.end(), less<ijv_type>()); - for (auto it=tmp.begin();it<tmp.end();++it) { - ijv_type aij = (*it); - - _aj[_nnz] = aij.Col(); - _ax[_nnz] = aij.Val(); - ++_nnz; - } - tmp.clear(); - } - _ap[_m] = _nnz; - - return 0; - } - - /// \brief add the matrix b into this non-zero entires - template<typename VT, - typename OT, - typename ST, - typename SpT, - typename MT> - int - add(const CrsMatrixBase<VT,OT,ST,SpT,MT> &b) { - - const ordinal_type m = min(b._m, _m); - for (ordinal_type i=0;i<m;++i) { - const size_type jaend = _ap[i+1]; - const size_type jbend = b._ap[i+1]; - - size_type ja = _ap[i]; - size_type jb = b._ap[i]; - - for ( ;jb<jbend;++jb) { - for ( ;(_aj[ja]<b._aj[jb] && ja<jaend);++ja); - _ax[ja] += (_aj[ja] == b._aj[jb])*b._ax[jb]; - } - } - - return 0; - } - - int symmetrize(const int uplo, - const bool conjugate = false) { - vector<ijv_type> mm; - mm.reserve(_nnz*2); - - for (ordinal_type i=0;i<_m;++i) { - const size_type jbegin = _ap[i]; - const size_type jend = _ap[i+1]; - for (size_type jj=jbegin;jj<jend;++jj) { - const ordinal_type j = _aj[jj]; - const value_type val = (conjugate ? conj(_ax[j]) : _ax[j]); - if (uplo == Uplo::Lower && i > j) { - mm.push_back(ijv_type(i, j, val)); - mm.push_back(ijv_type(j, i, val)); - } else if (uplo == Uplo::Upper && i < j) { - mm.push_back(ijv_type(i, j, val)); - mm.push_back(ijv_type(j, i, val)); - } else if (i == j) { - mm.push_back(ijv_type(i, i, val)); - } - } - } - sort(mm.begin(), mm.end(), less<ijv_type>()); - - createInternalArrays(_m, _n, mm.size()); - - ijv2crs(mm); - - return 0; - } - - int hermitianize(int uplo) { - return symmetrize(uplo, true); - } - - ostream& showMe(ostream &os) const { - streamsize prec = os.precision(); - os.precision(8); - os << scientific; - - os << " -- CrsMatrixBase -- " << endl - << " # of Rows = " << _m << endl - << " # of Cols = " << _n << endl - << " # of NonZeros = " << _nnz << endl - << endl - << " RowPtrArray length = " << _ap.dimension_0() << endl - << " ColArray length = " << _aj.dimension_0() << endl - << " ValueArray length = " << _ax.dimension_0() << endl - << endl; - - const int w = 10; - if (_ap.size() && _aj.size() && _ax.size()) { - os << setw(w) << "Row" << " " - << setw(w) << "Col" << " " - << setw(w) << "Val" << endl; - for (ordinal_type i=0;i<_m;++i) { - size_type jbegin = _ap[i], jend = _ap[i+1]; - for (size_type j=jbegin;j<jend;++j) { - value_type val = _ax[j]; - os << setw(w) << i << " " - << setw(w) << _aj[j] << " " - << setw(w) << val << endl; - } - } - } - - os.unsetf(ios::scientific); - os.precision(prec); - - return os; - } - - int importMatrixMarket(ifstream &file) { - - vector<ijv_type> mm; - const ordinal_type mm_base = 1; - - { - string header; - if (file.is_open()) { - getline(file, header); - while (file.good()) { - char c = file.peek(); - if (c == '%' || c == '\n') { - file.ignore(256, '\n'); - continue; - } - break; - } - } else { - ERROR(MSG_INVALID_INPUT(file)); - } - - // check the header - bool symmetry = (header.find("symmetric") != string::npos); - - // read matrix specification - ordinal_type m, n; - size_type nnz; - - file >> m >> n >> nnz; - - mm.reserve(nnz*(symmetry ? 2 : 1)); - for (size_type i=0;i<nnz;++i) { - ordinal_type row, col; - value_type val; - file >> row >> col >> val; - - row -= mm_base; - col -= mm_base; - - mm.push_back(ijv_type(row, col, val)); - if (symmetry && row != col) - mm.push_back(ijv_type(col, row, val)); - } - sort(mm.begin(), mm.end(), less<ijv_type>()); - - // construct workspace and set variables - createInternalArrays(m, n, mm.size()); - } - - // change mm to crs - ijv2crs(mm); - - return 0; - } - - int exportMatrixMarket(ofstream &file, - const string comment, - const int uplo = 0) { - streamsize prec = file.precision(); - file.precision(8); - file << scientific; - - file << "%%MatrixMarket matrix coordinate " - << (is_fundamental<value_type>::value ? "real " : "complex ") - << ((uplo == Uplo::Upper || uplo == Uplo::Lower) ? "symmetric " : "general ") - << endl; - - file << comment << endl; - - // cnt nnz - size_type nnz = 0; - for (ordinal_type i=0;i<_m;++i) { - const size_type jbegin = _ap[i], jend = _ap[i+1]; - for (size_type j=jbegin;j<jend;++j) { - if (uplo == Uplo::Upper && i <= _aj[j]) ++nnz; - if (uplo == Uplo::Lower && i >= _aj[j]) ++nnz; - if (!uplo) ++nnz; - } - } - file << _m << " " << _n << " " << nnz << endl; - - const int w = 10; - for (ordinal_type i=0;i<_m;++i) { - const size_type jbegin = _ap[i], jend = _ap[i+1]; - for (size_type j=jbegin;j<jend;++j) { - bool flag = false; - if (uplo == Uplo::Upper && i <= _aj[j]) flag = true; - if (uplo == Uplo::Lower && i >= _aj[j]) flag = true; - if (!uplo) flag = true; - if (flag) { - value_type val = _ax[j]; - file << setw(w) << ( i+1) << " " - << setw(w) << (_aj[j]+1) << " " - << setw(w) << val << endl; - } - } - } - - file.unsetf(ios::scientific); - file.precision(prec); - - return 0; - } - - //---------------------------------------------------------------------- - - int convertGraph(size_type_array rptr, - ordinal_type_array cidx) const { - ordinal_type ii = 0; - size_type jj = 0; - - for (ordinal_type i=0;i<_m;++i) { - size_type jbegin = _ap[i], jend = _ap[i+1]; - rptr[ii++] = jj; - for (size_type j=jbegin;j<jend;++j) - if (i != _aj[j]) - cidx[jj++] = _aj[j]; - } - rptr[ii] = jj; - - return 0; - } - - //---------------------------------------------------------------------- - - }; - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp b/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp deleted file mode 100644 index e1ff0f3a9fd403ae51d68f77358409e1e3cd5cca..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp +++ /dev/null @@ -1,104 +0,0 @@ -#pragma once -#ifndef __CRS_MATRIX_BASE_IMPL_HPP__ -#define __CRS_MATRIX_BASE_IMPL_HPP__ - -/// \file crs_matrix_base_impl.hpp -/// \brief Implementation of external interfaces to CrsMatrixBase -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - template<typename VT, - typename OT, - typename ST, - typename SpT, - typename MT> - inline int - CrsMatrixBase<VT,OT,ST,SpT,MT>::importMatrixMarket(ifstream &file) { - // skip initial title comments - { - ordinal_type m, n; - size_type nnz; - - while (file.good()) { - char c = file.peek(); - if (c == '%' || c == '\n') { - file.ignore(256, '\n'); - continue; - } - break; - } - - // read matrix specification - file >> m >> n >> nnz; - - // construct workspace and set variables - createInternalArrays(m, n, nnz); - } - - // read the coordinate format (matrix-market) - vector<ijv_type> mm; - mm.reserve(_nnz); - { - // matrix market use one base index - const ordinal_type mm_base = 1; - - for (size_type i=0;i<_nnz;++i) { - ijv_type aij; - file >> aij.Row() >> aij.Col() >> aij.Val(); - - // one base to zero base - aij.Row() -= mm_base; - aij.Col() -= mm_base; - - mm.push_back(aij); - } - sort(mm.begin(), mm.end(), less<ijv_type>()); - } - - // change mm to crs - { - ordinal_type ii = 0; - size_type jj = 0; - - ijv_type prev = mm[0]; - _ap[ii++] = 0; - _aj[jj] = prev.Col(); - _ax[jj] = prev.Val(); - ++jj; - - for (typename vector<ijv_type>::iterator it=(mm.begin()+1);it<mm.end();++it) { - ijv_type aij = (*it); - - // row index - if (aij.Row() != prev.Row()) { - _ap[ii++] = jj; - } - - if (aij == prev) { - --jj; - _aj[jj] = aij.Col(); - _ax[jj] += aij.Val(); - } else { - _aj[jj] = aij.Col(); - _ax[jj] = aij.Val(); - } - ++jj; - - prev = aij; - } - - // add the last index to terminate the storage - _ap[ii++] = jj; - _nnz = jj; - } - - return 0; - } - -} - - -#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp deleted file mode 100644 index 5b80e77935fcb968bff8f05e9876a10299a82182..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_matrix_helper.hpp +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#ifndef __CRS_MATRIX_HELPER_HPP__ -#define __CRS_MATRIX_HELPER_HPP__ - -/// \file crs_matrix_helper.hpp -/// \brief This file includes utility functions to convert between flat and hierarchical matrices. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" - -namespace Tacho { - - using namespace std; - - class CrsMatrixHelper { - public: - - template< typename CrsHierBase > - static int fillRowViewArray( CrsHierBase & HU ); - - template<typename CrsFlatBase> - static int - filterZeros(CrsFlatBase &flat); - - /// \brief Transform a scalar flat matrix to hierarchical matrix of matrices 1x1; testing only. - template<typename CrsFlatBase, - typename CrsHierBase> - static int - flat2hier(CrsFlatBase &flat, - CrsHierBase &hier); - - /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. - template<typename CrsFlatBase, - typename CrsHierBase, - typename HostOrdinalTypeArray > - static int - flat2hier(int uplo, - CrsFlatBase &flat, - CrsHierBase &hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range, - const HostOrdinalTypeArray tree); - - /// \brief Transform a scalar flat matrix to upper hierarchical matrix given scotch info. - template<typename CrsFlatBase, - typename CrsHierBase, - typename HostOrdinalTypeArray > - static int - flat2hier_upper(CrsFlatBase &flat, - CrsHierBase &hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range, - const HostOrdinalTypeArray tree); - - /// \brief Transform a scalar flat matrix to lower hierarchical matrix given scotch info. - template<typename CrsFlatBase, - typename CrsHierBase, - typename HostOrdinalTypeArray > - static int - flat2hier_lower(CrsFlatBase &flat, - CrsHierBase &hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range, - const HostOrdinalTypeArray tree); - }; - -} - -#include "crs_matrix_helper_impl.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp b/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp deleted file mode 100644 index 0fc4c9f1b83c0e48d3e42eb61e8e1cea12b1c187..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp +++ /dev/null @@ -1,364 +0,0 @@ - -#ifndef __CRS_MATRIX_HELPER_IMPL_HPP__ -#define __CRS_MATRIX_HELPER_IMPL_HPP__ - -/// \file crs_matrix_helper_impl.hpp -/// \brief This file includes utility functions to convert between flat and hierarchical matrices. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" - -namespace Tacho { - - using namespace std; - - template< typename CrsHierBase > - struct FunctorFillRowViewArray { - - typedef typename CrsHierBase::ordinal_type ordinal_type ; - typedef typename CrsHierBase::row_view_type_array row_view_type_array ; - typedef typename CrsHierBase::value_type_array ax_type ; - - typedef ordinal_type value_type ; - - row_view_type_array _all_row_views ; - ax_type _ax ; - - FunctorFillRowViewArray( const row_view_type_array & arg_all_row_views - , const ax_type & arg_ax ) - : _all_row_views( arg_all_row_views ) - , _ax( arg_ax ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( ordinal_type k , ordinal_type & value ) const - { value += _ax(k).NumRows(); } - - KOKKOS_INLINE_FUNCTION - void operator()( ordinal_type k , ordinal_type & value , bool final ) const - { - if ( final ) { - const int begin = value ; - const int end = begin + _ax(k).NumRows(); - - auto sub = Kokkos::subview( _all_row_views, Kokkos::pair<int,int>(begin,end) ); - - _ax(k).setRowViewArray( sub ); - } - - value += _ax(k).NumRows(); - } - }; - - template< typename CrsHierBase > - int CrsMatrixHelper::fillRowViewArray( CrsHierBase & device_HU ) - { - typedef typename CrsHierBase::row_view_type_array row_view_type_array ; - typedef typename CrsHierBase::space_type space_type ; - - ordinal_type total_row_view_count = 0 ; - - Kokkos::RangePolicy< space_type > - range_policy( 0 , device_HU.NumNonZeros() ); - - space_type::fence(); - - { - FunctorFillRowViewArray< CrsHierBase > - functor( row_view_type_array() , device_HU._ax ); - - - Kokkos::parallel_reduce( range_policy , functor , total_row_view_count ); - } - - device_HU._all_row_views = - row_view_type_array("RowViews",total_row_view_count); - - space_type::fence(); - - { - FunctorFillRowViewArray< CrsHierBase > - functor( device_HU._all_row_views , device_HU._ax ); - - Kokkos::parallel_scan( range_policy , functor ); - } - - space_type::fence(); - - return 0 ; - } - - template<typename CrsFlatBase> - int - CrsMatrixHelper::filterZeros(CrsFlatBase &flat) { - typedef typename CrsFlatBase::ordinal_type ordinal_type; - typedef typename CrsFlatBase::size_type size_type; - typedef typename CrsFlatBase::value_type value_type; - - typedef typename CrsFlatBase::ordinal_type_array_ptr ordinal_type_array_ptr; - typedef typename CrsFlatBase::value_type_array_ptr value_type_array_ptr; - - size_type nz = 0; - const value_type zero(0); - - for (ordinal_type k=0;k<flat.NumNonZeros();++k) - nz += (flat.Value(k) == zero) ; - - if (nz) { - CrsFlatBase resized(flat.Label() + "::ZeroFiltered", - flat.NumRows(), - flat.NumCols(), - flat.NumNonZeros() - nz); - - ordinal_type_array_ptr rows = resized.RowPtr(); rows[0] = 0; - ordinal_type_array_ptr cols = resized.ColPtr(); - value_type_array_ptr vals = resized.ValuePtr(); - - size_type nnz = 0; - for (ordinal_type i=0;i<flat.NumRows();++i) { - const ordinal_type nnz_in_row = flat.NumNonZerosInRow(i); - const ordinal_type_array_ptr cols_in_row = flat.ColsInRow(i); - const value_type_array_ptr vals_in_row = flat.ValuesInRow(i); - - for (ordinal_type j=0;j<nnz_in_row;++j) { - if (vals_in_row[j] != zero) { - cols[nnz] = cols_in_row[j]; - vals[nnz] = vals_in_row[j]; - ++nnz; - } - } - rows[i+1] = nnz; - } - flat = resized; - resized.setNumNonZeros(); - } - - return 0; - } - - - template<typename CrsFlatBase, - typename CrsHierBase> - int - CrsMatrixHelper::flat2hier(CrsFlatBase &flat, - CrsHierBase &hier) { - typedef typename CrsHierBase::ordinal_type ordinal_type; - typedef typename CrsHierBase::size_type size_type; - typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; - - size_type nnz = 0; - - hier.createInternalArrays(flat.NumRows(), flat.NumCols(), flat.NumNonZeros()); - - for (ordinal_type i=0;i<flat.NumRows();++i) { - ordinal_type jsize = flat.NumNonZerosInRow(i); - - hier._ap[i] = nnz; - ordinal_type_array_ptr ci = flat.ColsInRow(i); - for (ordinal_type j=0;j<jsize;++j,++nnz) { - hier._aj[nnz] = ci[j]; - hier._ax[nnz].setView( flat, i, 1, - /**/ ci[j], 1); - } - } - - hier._ap[flat.NumRows()] = nnz; - hier._nnz = nnz; - - return 0; - } - - template< typename CrsFlatBase , - typename CrsHierBase , - typename HostOrdinalTypeArray > - int - CrsMatrixHelper::flat2hier(int uplo, - CrsFlatBase &flat, - CrsHierBase &hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range , - const HostOrdinalTypeArray tree) { - switch(uplo) { - case Uplo::Upper: return flat2hier_upper(flat, hier, nblks, range, tree); - case Uplo::Lower: return flat2hier_lower(flat, hier, nblks, range, tree); - } - return -1; - } - - template<typename CrsFlatBase, - typename CrsHierBase, - typename HostOrdinalTypeArray > - int - CrsMatrixHelper::flat2hier_upper(CrsFlatBase & device_flat, - CrsHierBase & device_hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range, - const HostOrdinalTypeArray tree) { - typedef typename CrsHierBase::ordinal_type ordinal_type; - typedef typename CrsHierBase::size_type size_type; - - //typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; - //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; - //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; - - size_type nnz = 0; - - // count nnz and nnz in rows for the upper triangular hier matrix - for (ordinal_type i=0;i<nblks;++i) - for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) ; - - // create upper triangular block matrix - device_hier.createInternalArrays(nblks, nblks, nnz); - - typename CrsHierBase::size_type_array::HostMirror - host_ap = Kokkos::create_mirror_view( device_hier._ap ); - - typename CrsHierBase::ordinal_type_array::HostMirror - host_aj = Kokkos::create_mirror_view( device_hier._aj ); - - typename CrsHierBase::value_type_array::HostMirror - host_ax = Kokkos::create_mirror_view( device_hier._ax ); - - nnz = 0; - for (ordinal_type i=0;i<nblks;++i) { - host_ap[i] = nnz; - for (ordinal_type j=i;j != -1;++nnz,j=tree[j]) { - host_aj[nnz] = j; - host_ax[nnz].setView( device_flat, range[i], (range[i+1] - range[i]), - /**/ range[j], (range[j+1] - range[j])); - - // this checking might more expensive - // and attempts to access device memory from the host - // if (!host_ax[nnz].countNumNonZeros()) - // --nnz; - } - } - - host_ap[nblks] = nnz; - - Kokkos::deep_copy( device_hier._ap , host_ap ); - Kokkos::deep_copy( device_hier._aj , host_aj ); - Kokkos::deep_copy( device_hier._ax , host_ax ); - - device_hier._nnz = nnz; - - return 0; - } - - // template<typename CrsFlatBase, - // typename CrsHierBase> - // int - // CrsMatrixHelper::flat2hier_upper(CrsFlatBase &flat, - // CrsHierBase &hier, - // const typename CrsHierBase::ordinal_type nblks, - // const typename CrsHierBase::ordinal_type_array range, - // const typename CrsHierBase::ordinal_type_array tree) { - // typedef typename CrsHierBase::ordinal_type ordinal_type; - // typedef typename CrsHierBase::size_type size_type; - - // typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; - // //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; - // //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; - - // ordinal_type_array sibling("CrsMatrixHelper::flat2hier_upper::sibling", nblks); - - // // check the end of adjacent siblings (if not adjacent, they are separators) - // ordinal_type p = tree[0]; - // for (ordinal_type i=1;i<nblks;++i) { - // const ordinal_type j = tree[i]; - // if (p != j) { - // p = j; - // sibling[i-1] = -1; - // } - // } - // sibling[nblks-1] = -1; - - // size_type nnz = 0; - - // // count nnz and nnz in rows for the upper triangular hier matrix - // for (ordinal_type i=0;i<nblks;++i) { // search for all rows - // for (ordinal_type j=i;j != -1;j=tree[j]) { // move up - // ordinal_type k=j; - // do { - // ++nnz; - // } while (sibling[k++] != -1); - // } - // } - - // // create upper triangular block matrix - // hier.createInternalArrays(nblks, nblks, nnz); - - // nnz = 0; - // for (ordinal_type i=0;i<nblks;++i) { - // hier._ap[i] = nnz; - // for (ordinal_type j=i;j != -1;j=tree[j]) { - // ordinal_type k=j; - // do { - // hier._aj[nnz] = k; - // hier._ax[nnz].setView( flat, range[i], (range[i+1] - range[i]), - // /**/ range[k], (range[k+1] - range[k])); - - // // this checking might more expensive - // if (hier._ax[nnz].hasNumNonZeros()) - // ++nnz; - // } while (sibling[k++] != -1); - // } - // } - // hier._ap[nblks] = nnz; - // hier._nnz = nnz; - - // return 0; - // } - - template<typename CrsFlatBase, - typename CrsHierBase, - typename HostOrdinalTypeArray > - int - CrsMatrixHelper::flat2hier_lower(CrsFlatBase &flat, - CrsHierBase &hier, - const typename CrsHierBase::ordinal_type nblks, - const HostOrdinalTypeArray range, - const HostOrdinalTypeArray tree) { - ERROR(MSG_NOT_YET_IMPLEMENTED); - - // typedef typename CrsHierBase::ordinal_type ordinal_type; - // typedef typename CrsHierBase::size_type size_type; - - // typedef typename CrsHierBase::ordinal_type_array ordinal_type_array; - // //typedef typename CrsHierBase::ordinal_type_array_ptr ordinal_type_array_ptr; - // //typedef typename CrsHierBase::value_type_array_ptr value_type_array_ptr; - - // ordinal_type_array tmp = ordinal_type_array("flat2hier:tmp", nblks+1); - // size_type nnz = 0; - - // // count nnz and nnz in rows for lower triangular matrix - // for (ordinal_type i=0;i<nblks;++i) - // for (ordinal_type j=i;j != -1;++nnz) { - // ++tmp[j]; - // j = tree[j]; - // } - - // // count nnz and nnz in rows for lower triangular matrix - // hier.createInternalArrays(nblks, nblks, nnz); - // for (ordinal_type i=1;i<(nblks+1);++i) - // hier._ap[i] = hier._ap[i-1] + tmp[i-1]; - - // for (ordinal_type i=0;i<(nblks+1);++i) - // tmp[i] = hier._ap[i]; - - // for (ordinal_type i=0;i<nblks;++i) - // for (ordinal_type j=i;j != -1;j=tree[j]) { - // hier._aj[tmp[j]] = i; - // hier._ax[tmp[j]].setView( flat, range[j], (range[j+1] - range[j]), - // /**/ range[i], (range[i+1] - range[i])); - // ++tmp[j]; - // } - - return 0; - } - -} - - -#endif - diff --git a/lib/kokkos/example/ichol/src/crs_matrix_view.hpp b/lib/kokkos/example/ichol/src/crs_matrix_view.hpp deleted file mode 100644 index 2a55e6fac9b64eca3eade412a1511913baafab85..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_matrix_view.hpp +++ /dev/null @@ -1,226 +0,0 @@ -#pragma once -#ifndef __CRS_MATRIX_VIEW_HPP__ -#define __CRS_MATRIX_VIEW_HPP__ - -/// \file crs_matrix_view.hpp -/// \brief CRS matrix view object creates 2D view to setup a computing region. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" - -namespace Tacho { - - using namespace std; - - template<typename CrsMatBaseType> - class CrsRowView; - - template<typename CrsMatBaseType> - class CrsMatrixView { - public: - typedef typename CrsMatBaseType::space_type space_type; - - typedef typename CrsMatBaseType::value_type value_type; - typedef typename CrsMatBaseType::ordinal_type ordinal_type; - typedef typename CrsMatBaseType::size_type size_type; - - typedef CrsMatBaseType mat_base_type; - typedef CrsRowView<mat_base_type> row_view_type; - - // be careful this use rcp and atomic operation - // - use setView to create a view if _rows is not necessary - // - copy constructor and assignment operator will do soft copy of the object - typedef Kokkos::View<row_view_type*,space_type,Kokkos::MemoryUnmanaged> row_view_type_array; - - private: - CrsMatBaseType _base; // shallow copy of the base object - ordinal_type _offm; // offset in rows - ordinal_type _offn; // offset in cols - ordinal_type _m; // # of rows - ordinal_type _n; // # of cols - - row_view_type_array _rows; - - public: - - KOKKOS_INLINE_FUNCTION - void setRowViewArray( const row_view_type_array & arg_rows ) - { - _rows = arg_rows ; - - for (ordinal_type i=0;i<_m;++i) { - _rows[i].setView(*this, i); - } - } - - KOKKOS_INLINE_FUNCTION - row_view_type& RowView(const ordinal_type i) const { return _rows[i]; } - - KOKKOS_INLINE_FUNCTION - void setView(const CrsMatBaseType &base, - const ordinal_type offm, const ordinal_type m, - const ordinal_type offn, const ordinal_type n) { - _base = base; - - _offm = offm; _m = m; - _offn = offn; _n = n; - } - - KOKKOS_INLINE_FUNCTION - const CrsMatBaseType & BaseObject() const { return _base; } - - KOKKOS_INLINE_FUNCTION - ordinal_type OffsetRows() const { return _offm; } - - KOKKOS_INLINE_FUNCTION - ordinal_type OffsetCols() const { return _offn; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumRows() const { return _m; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumCols() const { return _n; } - - KOKKOS_INLINE_FUNCTION - bool hasNumNonZeros() const { - const ordinal_type m = NumRows(); - for (ordinal_type i=0;i<m;++i) { - row_view_type row; - row.setView(*this, i); - if (row.NumNonZeros()) return true; - } - return false; - } - - inline - size_type countNumNonZeros() const { - size_type nnz = 0; - const ordinal_type m = NumRows(); - for (ordinal_type i=0;i<m;++i) { - row_view_type row; - row.setView(*this, i); - nnz += row.NumNonZeros(); - } - return nnz; - } - - KOKKOS_INLINE_FUNCTION - CrsMatrixView() - : _base(), - _offm(0), - _offn(0), - _m(0), - _n(0), - _rows() - { } - - KOKKOS_INLINE_FUNCTION - CrsMatrixView(const CrsMatrixView &b) - : _base(b._base), - _offm(b._offm), - _offn(b._offn), - _m(b._m), - _n(b._n), - _rows(b._rows) - { } - - KOKKOS_INLINE_FUNCTION - CrsMatrixView(const CrsMatBaseType & b) - : _base(b), - _offm(0), - _offn(0), - _m(b.NumRows()), - _n(b.NumCols()), - _rows() - { } - - CrsMatrixView(const CrsMatBaseType & b, - const ordinal_type offm, const ordinal_type m, - const ordinal_type offn, const ordinal_type n) - : _base(b), - _offm(offm), - _offn(offn), - _m(m), - _n(n), - _rows() - { } - - ostream& showMe(ostream &os) const { - const int w = 4; - os << "CrsMatrixView, " - << " Offs ( " << setw(w) << _offm << ", " << setw(w) << _offn << " ); " - << " Dims ( " << setw(w) << _m << ", " << setw(w) << _n << " ); " - << " NumNonZeros = " << countNumNonZeros() << ";"; - - return os; - } - - }; -} - - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if ! KOKKOS_USING_EXP_VIEW - -namespace Kokkos { - namespace Impl { - - // The Kokkos::View allocation will by default assign each allocated datum to zero. - // This is not the required initialization behavior when - // Tacho::CrsRowView and Tacho::CrsMatrixView - // are used within a Kokkos::View. - // Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct - // to replace the assignment initialization with placement new initialization. - // - // This work-around is necessary until a TBD design refactorization of Kokkos::View. - - template< class ExecSpace , typename T > - struct ViewDefaultConstruct< ExecSpace , Tacho::CrsRowView<T> , true > - { - typedef Tacho::CrsRowView<T> type ; - type * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { new(m_ptr+i) type(); } - - ViewDefaultConstruct( type * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } - }; - - template< class ExecSpace , typename T > - struct ViewDefaultConstruct< ExecSpace , Tacho::CrsMatrixView<T> , true > - { - typedef Tacho::CrsMatrixView<T> type ; - type * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { new(m_ptr+i) type(); } - - ViewDefaultConstruct( type * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } - }; - - } // namespace Impl -} // namespace Kokkos - -#endif /* #if ! KOKKOS_USING_EXP_VIEW */ - - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif diff --git a/lib/kokkos/example/ichol/src/crs_row_view.hpp b/lib/kokkos/example/ichol/src/crs_row_view.hpp deleted file mode 100644 index 8556bcb9e637dd64afdf92f4ef6b526a14562d09..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/crs_row_view.hpp +++ /dev/null @@ -1,185 +0,0 @@ -#pragma once -#ifndef __CRS_ROW_VIEW_HPP__ -#define __CRS_ROW_VIEW_HPP__ - -/// \file crs_row_view.hpp -/// \brief A view to a row extracted from CrsMatrixView. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - /// \class CrsRowView - template<typename CrsMatBaseType> - class CrsRowView { - public: - typedef typename CrsMatBaseType::ordinal_type ordinal_type; - typedef typename CrsMatBaseType::value_type value_type; - typedef typename CrsMatBaseType::ordinal_type_array_ptr ordinal_type_array_ptr; - typedef typename CrsMatBaseType::value_type_array_ptr value_type_array_ptr; - - private: - // row info - ordinal_type _offn, _n; - - // this assumes a contiguous memory buffer - ordinal_type_array_ptr _aj, _ajn; // column index compressed format in row - value_type_array_ptr _ax; // values - - static KOKKOS_INLINE_FUNCTION - typename CrsMatBaseType::ordinal_type_array_ptr - lower_bound( typename CrsMatBaseType::ordinal_type_array_ptr begin , - typename CrsMatBaseType::ordinal_type_array_ptr const end , - typename CrsMatBaseType::ordinal_type const val ) - { - typename CrsMatBaseType::ordinal_type_array_ptr it = begin ; - int count = end - begin ; - int step = 0 ; - while (count>0) { - it = begin ; - it += ( step = (count >> 1) ); - if (*it<val) { - begin=++it; - count-=step+1; - } - else { count=step; } - } - return begin; - } - - public: - KOKKOS_INLINE_FUNCTION - ordinal_type OffsetCols() const { return _offn; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumCols() const { return _n; } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumNonZeros() const { return _ajn - _aj; } - - KOKKOS_INLINE_FUNCTION - ordinal_type Col(const ordinal_type j) const { return _aj[j] - _offn; } - - KOKKOS_INLINE_FUNCTION - value_type& Value(const ordinal_type j) { return _ax[j]; } - - KOKKOS_INLINE_FUNCTION - value_type Value(const ordinal_type j) const { return _ax[j]; } - - KOKKOS_INLINE_FUNCTION - ordinal_type Index(const ordinal_type col ) const { - const ordinal_type loc = _offn + col ; - // binary search - ordinal_type_array_ptr aj = CrsRowView::lower_bound(_aj, _ajn, loc); - - // if found, return index for the location, - // otherwise return -1 (not found), -2 (end of array) - return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2); - } - - KOKKOS_INLINE_FUNCTION - ordinal_type Index(const ordinal_type col, - const ordinal_type prev ) const { - const ordinal_type loc = _offn + col; - ordinal_type_array_ptr aj = _aj + prev; - - // binary search - // aj = lower_bound(aj, _ajn, loc); - - // linear search from prev: this gains about 45 % faster - for ( ;aj < _ajn && *aj<loc; ++aj); - - // if found, return index for the location, - // otherwise return -1 (not found), -2 (end of array) - return (aj < _ajn ? (*aj == loc ? aj - _aj : -1) : -2); - } - - KOKKOS_INLINE_FUNCTION - value_type ValueAtColumn(const ordinal_type col) const { - const ordinal_type j = Index(col); - return (j < 0 ? value_type(0) : _ax[j]); - } - - KOKKOS_INLINE_FUNCTION - CrsRowView() - : _offn(0), - _n(0), - _aj(), - _ajn(), - _ax() - { } - - - KOKKOS_INLINE_FUNCTION - CrsRowView(const ordinal_type offn, - const ordinal_type n, - const ordinal_type_array_ptr aj, - const ordinal_type_array_ptr ajn, - const value_type_array_ptr ax) - : _offn(offn), - _n(n), - _aj(aj), - _ajn(ajn), - _ax(ax) - { } - - KOKKOS_INLINE_FUNCTION - CrsRowView(const CrsMatrixView<CrsMatBaseType> &A, - const ordinal_type i) { - this->setView(A, i); - } - - KOKKOS_INLINE_FUNCTION - CrsRowView(const CrsMatBaseType &A, - const ordinal_type i) { - this->setView(A, i); - } - - KOKKOS_INLINE_FUNCTION - void setView(const CrsMatrixView<CrsMatBaseType> &A, - const ordinal_type i) { - _offn = A.OffsetCols(); - _n = A.NumCols(); - - const ordinal_type ii = A.OffsetRows() + i; - - const typename CrsMatBaseType::ordinal_type_array_ptr cols = A.BaseObject().ColsInRow(ii); - const typename CrsMatBaseType::ordinal_type_array_ptr next = A.BaseObject().ColsInRow(ii+1); - const typename CrsMatBaseType::value_type_array_ptr vals = A.BaseObject().ValuesInRow(ii); - - // [cols..next) is sorted so a log(N) search could performed - _aj = CrsRowView::lower_bound(cols, next, _offn); - _ajn = CrsRowView::lower_bound(_aj, next, _offn+_n); - - _ax = &vals[_aj - cols]; - } - - KOKKOS_INLINE_FUNCTION - void setView(const CrsMatBaseType &A, - const ordinal_type i) { - _offn = 0; - _n = A.NumCols(); - _aj = A.ColsInRow(i); - _ajn = A.ColsInRow(i+1); - _ax = A.ValuesInRow(i); - } - - ostream& showMe(ostream &os) const { - const ordinal_type nnz = NumNonZeros(); - const ordinal_type offset = OffsetCols(); - os << " offset = " << offset - << ", nnz = " << nnz - << endl; - for (ordinal_type j=0;j<nnz;++j) { - const value_type val = _ax[j]; - os << "(" << Col(j) << ", " - << val << ")" - << endl; - } - return os; - } - }; -} - -#endif diff --git a/lib/kokkos/example/ichol/src/dot.hpp b/lib/kokkos/example/ichol/src/dot.hpp deleted file mode 100644 index acf927e0689759873b441012e187131a54055f88..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/dot.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once -#ifndef __DOT_HPP__ -#define __DOT_HPP__ - -/// \file dot.hpp -/// \brief Sparse dot product. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -/// dot_type result = x^H y - -namespace Tacho { - - using namespace std; - - template<typename T> struct DotTraits { - typedef T dot_type; - - static KOKKOS_FORCEINLINE_FUNCTION - dot_type - // dot(const T &x, const T &y) { return conj<T>(x)*y; } - dot(const T &x, const T &y) { return x*y; } - }; - - template<typename CrsRowViewType> - KOKKOS_INLINE_FUNCTION - typename CrsRowViewType::value_type - dot(const CrsRowViewType x, const CrsRowViewType y) { - typedef typename CrsRowViewType::ordinal_type ordinal_type; - typedef typename CrsRowViewType::value_type value_type; - - typedef DotTraits<value_type> dot_traits; - - value_type r_val(0); - - const ordinal_type nnz_x = x.NumNonZeros(); - const ordinal_type nnz_y = y.NumNonZeros(); - - for (ordinal_type jx=0, jy=0;jx<nnz_x && jy<nnz_y;) { - const ordinal_type diff = x.Col(jx) - y.Col(jy); - const ordinal_type sign = (0 < diff) - (diff < 0); - switch (sign) { - case 0: - r_val += dot_traits::dot(x.Value(jx++), y.Value(jy++)); - break; - case -1: ++jx; break; - case 1: ++jy; break; - } - } - - return r_val; - } - - template<typename CrsRowViewType> - KOKKOS_INLINE_FUNCTION - typename CrsRowViewType::value_type - dot(const CrsRowViewType x) { - typedef typename CrsRowViewType::ordinal_type ordinal_type; - typedef typename CrsRowViewType::value_type value_type; - - typedef DotTraits<value_type> dot_traits; - - value_type r_val(0); - - const ordinal_type nnz = x.NumNonZeros(); - - for (ordinal_type j=0;j<nnz;++j) - r_val += dot_traits::dot(x.Value(j), x.Value(j)); - - return r_val; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/gemm.hpp b/lib/kokkos/example/ichol/src/gemm.hpp deleted file mode 100644 index 33c6058ec6fc6727dc62a320cab7bbb1855ea93f..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/gemm.hpp +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once -#ifndef __GEMM_HPP__ -#define __GEMM_HPP__ - -/// \file gemm.hpp -/// \brief Sparse matrix-matrix multiplication on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "control.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template<int ArgTransA, int ArgTransB, int ArgAlgo, - int ArgVariant = Variant::One, - template<int,int> class ControlType = Control> - struct Gemm { - - // data-parallel interface - // ======================= - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeB, - typename ExecViewTypeC> - KOKKOS_INLINE_FUNCTION - static int invoke(typename ExecViewTypeA::policy_type &policy, - const typename ExecViewTypeA::policy_type::member_type &member, - const ScalarType alpha, - typename ExecViewTypeA::matrix_type &A, - typename ExecViewTypeB::matrix_type &B, - const ScalarType beta, - typename ExecViewTypeC::matrix_type &C); - - // task-data parallel interface - // ============================ - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeB, - typename ExecViewTypeC> - class TaskFunctor { - public: - typedef typename ExecViewTypeA::policy_type policy_type; - typedef typename policy_type::member_type member_type; - typedef int value_type; - - private: - ScalarType _alpha, _beta; - typename ExecViewTypeA::matrix_type _A; - typename ExecViewTypeB::matrix_type _B; - typename ExecViewTypeC::matrix_type _C; - - policy_type _policy; - - public: - KOKKOS_INLINE_FUNCTION - TaskFunctor(const policy_type & P, - const ScalarType alpha, - const typename ExecViewTypeA::matrix_type & A, - const typename ExecViewTypeB::matrix_type & B, - const ScalarType beta, - const typename ExecViewTypeC::matrix_type & C) - : _alpha(alpha), - _beta(beta), - _A(A), - _B(B), - _C(C), - _policy(P) - { } - - string Label() const { return "Gemm"; } - - // task execution - KOKKOS_INLINE_FUNCTION - void apply(value_type &r_val) { - r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, _policy.member_single(), - _alpha, _A, _B, _beta, _C); - } - - // task-data execution - KOKKOS_INLINE_FUNCTION - void apply(const member_type &member, value_type &r_val) { - r_val = Gemm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB,ExecViewTypeC>(_policy, member, - _alpha, _A, _B, _beta, _C); - } - - }; - - }; - -} - - -// #include "gemm_nt_nt.hpp" -#include "gemm_ct_nt.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp deleted file mode 100644 index 13d2518cab90896929ecb58645e61aeb51849394..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/gemm_ct_nt.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#ifndef __GEMM_CT_NT_HPP__ -#define __GEMM_CT_NT_HPP__ - -/// \file gemm_ct_nt.hpp -/// \brief Sparse matrix-matrix multiplication on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "gemm_ct_nt_for_factor_blocked.hpp" -// #include "gemm_ct_nt_for_tri_solve_blocked.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp deleted file mode 100644 index 88a4658482a1504ab6ad6334d65bd34a7dea055f..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp +++ /dev/null @@ -1,108 +0,0 @@ -#pragma once -#ifndef __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__ -#define __GEMM_CT_NT_FOR_FACTOR_BLOCKED_HPP__ - -/// \file gemm_ct_nt_for_factor_blocked.hpp -/// \brief Sparse matrix-matrix multiplication on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - // Gemm used in the factorization phase - // ==================================== - template<> - template<typename ScalarType, - typename CrsExecViewTypeA, - typename CrsExecViewTypeB, - typename CrsExecViewTypeC> - KOKKOS_INLINE_FUNCTION - int - Gemm<Trans::ConjTranspose,Trans::NoTranspose, - AlgoGemm::ForFactorBlocked> - ::invoke(typename CrsExecViewTypeA::policy_type &policy, - const typename CrsExecViewTypeA::policy_type::member_type &member, - const ScalarType alpha, - typename CrsExecViewTypeA::matrix_type &A, - typename CrsExecViewTypeB::matrix_type &B, - const ScalarType beta, - typename CrsExecViewTypeC::matrix_type &C) { - typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; - typedef typename CrsExecViewTypeA::value_type value_type; - typedef typename CrsExecViewTypeA::row_view_type row_view_type; - - -if ( false && member.team_rank() == 0 ) { - printf("Gemm [%d +%d)x[%d +%d)\n" - , C.OffsetRows() - , C.NumRows() - , C.OffsetCols() - , C.NumCols() - ); -} - - // scale the matrix C with beta - scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C); - - // Sparse matrix-matrix multiply: - // C(i,j) += alpha*A'(i,k)*B(k,j) - - const ordinal_type mA = A.NumRows(); - for (ordinal_type k=0;k<mA;++k) { - row_view_type &a = A.RowView(k); - const ordinal_type nnz_a = a.NumNonZeros(); - - row_view_type &b = B.RowView(k); - const ordinal_type nnz_b = b.NumNonZeros(); - - if (nnz_a > 0 && nnz_b > 0 ) { -#if 0 - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, nnz_a), - [&](const ordinal_type i) { - const ordinal_type row_at_i = a.Col(i); - const value_type val_at_ik = a.Value(i); - // const value_type val_at_ik = conj(a.Value(i)); - - row_view_type &c = C.RowView(row_at_i); - - ordinal_type idx = 0; - for (ordinal_type j=0;j<nnz_b && (idx > -2);++j) { - const ordinal_type col_at_j = b.Col(j); - const value_type val_at_kj = b.Value(j); - - idx = c.Index(col_at_j, idx); - if (idx >= 0) - c.Value(idx) += alpha*val_at_ik*val_at_kj; - } - }); -#else - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, nnz_a * nnz_b ), - [&](const ordinal_type ii) { - const ordinal_type i = ii / nnz_a ; - const ordinal_type j = ii % nnz_a ; - - row_view_type &c = C.RowView( a.Col(i) ); - - // Binary search for c's index of b.Col(j) - const ordinal_type idx = c.Index( b.Col(j) ); - - if (idx >= 0) { - // const value_type val_at_ik = conj(a.Value(i)); - c.Value(idx) += alpha * a.Value(i) * b.Value(j); - } - }); -#endif - - member.team_barrier(); - } - } - - return 0; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp b/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp deleted file mode 100644 index d2dd004579a507439b457a12a6f0de909bf33acd..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/graph_helper_scotch.hpp +++ /dev/null @@ -1,427 +0,0 @@ -#pragma once -#ifndef __GRAPH_HELPER_SCOTCH_HPP__ -#define __GRAPH_HELPER_SCOTCH_HPP__ - -/// \file graph_helper_scotch.hpp -/// \brief Interface to scotch reordering -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "scotch.h" -#include "util.hpp" - -namespace Tacho { - - using namespace std; - - template<class CrsMatBaseType> - class GraphHelper_Scotch : public Disp { - public: - typedef typename CrsMatBaseType::ordinal_type ordinal_type; - typedef typename CrsMatBaseType::size_type size_type; - - typedef typename CrsMatBaseType::ordinal_type_array ordinal_type_array; - typedef typename CrsMatBaseType::size_type_array size_type_array; - - private: - string _label; - - // scotch main data structure - SCOTCH_Graph _graph; - SCOTCH_Num _strat; - int _level; - - // scotch input has no diagonal contribution - ordinal_type _base,_m; - ordinal_type_array _cidx; - - size_type _nnz; - size_type_array _rptr; - - // scotch output - ordinal_type _cblk; - ordinal_type_array _perm,_peri,_range,_tree; - - // status flag - bool _is_ordered; - - public: - - void setLabel(string label) { _label = label; } - string Label() const { return _label; } - - size_type NumNonZeros() const { return _nnz; } - ordinal_type NumRows() const { return _m; } - - size_type_array RowPtrVector() const { return _rptr; } - ordinal_type_array ColIndexVector() const { return _cidx; } - - ordinal_type_array PermVector() const { return _perm; } - ordinal_type_array InvPermVector() const { return _peri; } - - ordinal_type_array RangeVector() const { return _range; } - ordinal_type_array TreeVector() const { return _tree; } - - ordinal_type NumBlocks() const { return _cblk; } - - GraphHelper_Scotch() = default; - - // convert graph first - GraphHelper_Scotch(const string label, - const ordinal_type m, - const size_type_array rptr, - const ordinal_type_array cidx, - const int seed = GraphHelper::DefaultRandomSeed) { - - _label = "GraphHelper_Scotch::" + label; - - _is_ordered = false; - _cblk = 0; - - // scotch does not allow self-contribution (diagonal term in sparse matrix) - _base = 0; //A.BaseVal(); - _m = m; // A.NumRows(); - _nnz = rptr[m]; //A.NumNonZeros(); - - _rptr = rptr; //size_type_array(_label+"::RowPtrArray", _m+1); - _cidx = cidx; //ordinal_type_array(_label+"::ColIndexArray", _nnz); - - _perm = ordinal_type_array(_label+"::PermutationArray", _m); - _peri = ordinal_type_array(_label+"::InvPermutationArray", _m); - _range = ordinal_type_array(_label+"::RangeArray", _m); - _tree = ordinal_type_array(_label+"::TreeArray", _m); - - // create a graph structure without diagonals - _strat = 0; - _level = 0; - - //A.convertGraph(_nnz, _rptr, _cidx); - - int ierr = 0; - ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device()); - ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device()); - - if (seed != GraphHelper::DefaultRandomSeed) { - SCOTCH_randomSeed(seed); - SCOTCH_randomReset(); - } - - ierr = SCOTCH_graphInit(&_graph);CHKERR(ierr); - ierr = SCOTCH_graphBuild(&_graph, // scotch graph - _base, // base value - _m, // # of vertices - rptr_ptr, // column index array pointer begin - rptr_ptr+1, // column index array pointer end - NULL, // weights on vertices (optional) - NULL, // label array on vertices (optional) - _nnz, // # of nonzeros - cidx_ptr, // column index array - NULL);CHKERR(ierr); // edge load array (optional) - ierr = SCOTCH_graphCheck(&_graph);CHKERR(ierr); - } - GraphHelper_Scotch(const GraphHelper_Scotch &b) = default; - - virtual~GraphHelper_Scotch() { - SCOTCH_graphFree(&_graph); - } - - void setStratGraph(const SCOTCH_Num strat = 0) { - _strat = strat; - } - - void setTreeLevel(const int level = 0) { - _level = level; - } - - int computeOrdering(const ordinal_type treecut = 0, - const ordinal_type minblksize = 0) { - int ierr = 0; - - // pointers for global graph ordering - ordinal_type *perm = _perm.ptr_on_device(); - ordinal_type *peri = _peri.ptr_on_device(); - ordinal_type *range = _range.ptr_on_device(); - ordinal_type *tree = _tree.ptr_on_device(); - - { - const int level = (_level ? _level : max(1, int(log2(_m)-treecut))); // level = log2(_nnz)+10; - SCOTCH_Strat stradat; - SCOTCH_Num straval = _strat; - //(SCOTCH_STRATLEVELMAX));// | - //SCOTCH_STRATLEVELMIN | - //SCOTCH_STRATLEAFSIMPLE | - //SCOTCH_STRATSEPASIMPLE); - - ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr); - - // if both are zero, do not run strategy - if (_strat || _level) { - cout << "GraphHelper_Scotch:: User provide a strategy and/or level" << endl - << " strategy = " << _strat << ", level = " << _level << endl; - ierr = SCOTCH_stratGraphOrderBuild (&stradat, straval, level, 0.2);CHKERR(ierr); - } - ierr = SCOTCH_graphOrder(&_graph, - &stradat, - perm, - peri, - &_cblk, - range, - tree);CHKERR(ierr); - SCOTCH_stratExit(&stradat); - } - -#if 0 - { - // assume there are multiple roots - range[_cblk+1] = range[_cblk]; // dummy range - tree[_cblk] = -1; // dummy root - for (ordinal_type i=0;i<_cblk;++i) - if (tree[i] == -1) // multiple roots becomes children of the hummy root - tree[i] = (_cblk+1); - ++_cblk; // include the dummy root - } -#endif - - // provided blksize is greater than 0, reorder internally - // if (treecut > 0 && minblksize > 0) { - // // graph array - // ordinal_type *rptr_ptr = reinterpret_cast<ordinal_type*>(_rptr.ptr_on_device()); - // ordinal_type *cidx_ptr = reinterpret_cast<ordinal_type*>(_cidx.ptr_on_device()); - - // // create workspace in - // size_type_array rptr_work = size_type_array(_label+"::Block::RowPtrArray", _m+1); - // ordinal_type_array cidx_work = ordinal_type_array(_label+"::Block::ColIndexArray", _nnz); - - // // create workspace output - // ordinal_type_array perm_work = ordinal_type_array(_label+"::Block::PermutationArray", _m); - // ordinal_type_array peri_work = ordinal_type_array(_label+"::Block::InvPermutationArray", _m); - // ordinal_type_array range_work = ordinal_type_array(_label+"::Block::RangeArray", _m); - // ordinal_type_array tree_work = ordinal_type_array(_label+"::Block::TreeArray", _m); - - // // scotch input - // ordinal_type *rptr_blk = reinterpret_cast<ordinal_type*>(rptr_work.ptr_on_device()); - // ordinal_type *cidx_blk = reinterpret_cast<ordinal_type*>(cidx_work.ptr_on_device()); - - // size_type nnz = 0; - // rptr_blk[0] = nnz; - - // for (ordinal_type iblk=0;iblk<_cblk;++iblk) { - // // allocate graph - // SCOTCH_Graph graph; - - // ierr = SCOTCH_graphInit(&graph);CHKERR(ierr); - - // SCOTCH_Strat stradat; - // SCOTCH_Num straval = (/*SCOTCH_STRATLEVELMAX | - // SCOTCH_STRATLEVELMIN |*/ - // SCOTCH_STRATLEAFSIMPLE | - // SCOTCH_STRATSEPASIMPLE); - - // ierr = SCOTCH_stratInit(&stradat);CHKERR(ierr); - // ierr = SCOTCH_stratGraphOrderBuild(&stradat, straval, 0, 0.2);CHKERR(ierr); - - // const ordinal_type ibegin = range[iblk], iend = range[iblk+1], m = iend - ibegin; - - // // scotch output - // ordinal_type cblk_blk = 0; - - // ordinal_type *perm_blk = perm_work.ptr_on_device() + ibegin; - // ordinal_type *peri_blk = peri_work.ptr_on_device() + ibegin; - // ordinal_type *range_blk = range_work.ptr_on_device() + ibegin; - // ordinal_type *tree_blk = tree_work.ptr_on_device() + ibegin; - - // // if each blk is greater than the given minblksize, reorder internally - // if (m > minblksize) { - // for (int i=ibegin;i<iend;++i) { - // const ordinal_type ii = peri[i]; - // const ordinal_type jbegin = rptr_ptr[ii]; - // const ordinal_type jend = rptr_ptr[ii+1]; - - // for (int j=jbegin;j<jend;++j) { - // const ordinal_type jj = perm[cidx_ptr[j]]; - // if (ibegin <= jj && jj < iend) - // cidx_blk[nnz++] = (jj - ibegin); - // } - // rptr_blk[i+1] = nnz; - // } - // const size_type nnz_blk = nnz - rptr_blk[ibegin]; - - // ierr = SCOTCH_graphBuild(&graph, // scotch graph - // 0, // base value - // m, // # of vertices - // &rptr_blk[ibegin], // column index array pointer begin - // &rptr_blk[ibegin]+1,// column index array pointer end - // NULL, // weights on vertices (optional) - // NULL, // label array on vertices (optional) - // nnz_blk, // # of nonzeros - // cidx_blk, // column index array - // NULL);CHKERR(ierr); // edge load array (optional) - // ierr = SCOTCH_graphCheck(&graph);CHKERR(ierr); - // ierr = SCOTCH_graphOrder(&graph, - // &stradat, - // perm_blk, - // peri_blk, - // &cblk_blk, - // range_blk, - // tree_blk);CHKERR(ierr); - // } else { - // for (ordinal_type i=0;i<m;++i) { - // perm_blk[i] = i; - // peri_blk[i] = i; - // } - // range_blk[1] = m; - // tree_blk[0] = -1; - // } - - // SCOTCH_stratExit(&stradat); - // SCOTCH_graphFree(&graph); - - // for (ordinal_type i=0;i<m;++i) { - // const ordinal_type ii = peri_blk[i] + ibegin; - // peri_blk[i] = peri[ii]; - // } - // for (ordinal_type i=0;i<m;++i) { - // const ordinal_type ii = i + ibegin; - // peri[ii] = peri_blk[i]; - // } - - // } - - // for (ordinal_type i=0;i<_m;++i) - // perm[peri[i]] = i; - // } - - _is_ordered = true; - - //cout << "SCOTCH level = " << level << endl; - //cout << "Range Tree " << endl; - //for (int i=0;i<_cblk;++i) - // cout << _range[i] << " :: " << i << " " << _tree[i] << endl; - - return 0; - } - - int pruneTree(const ordinal_type cut) { - if (cut <=0 ) return 0; - - ordinal_type_array work = ordinal_type_array(_label+"::WorkArray", _cblk+1); - for (ordinal_type iter=0;iter<cut && _cblk > 1;++iter) { - // horizontal merging - { - ordinal_type cnt = 0; - ordinal_type parent = _tree[0]; - work[0] = cnt; - for (ordinal_type i=1;i<_cblk;++i) { - const ordinal_type myparent = _tree[i]; - if (myparent == parent) { - work[i] = cnt; - } else { - parent = _tree[i]; - work[i] = ++cnt; - } - } - work[_cblk] = ++cnt; - - ordinal_type prev = -2; - const ordinal_type root = _cblk - 1; - for (ordinal_type i=0;i<root;++i) { - const ordinal_type myparent = _tree[i]; - const ordinal_type me = work[i]; - - _tree[me] = work[myparent]; - if (prev != me) { - _range[me] = _range[i]; - prev = me; - } - } - { - const ordinal_type me = work[root]; - _tree[me] = -1; - _range[me] = _range[root]; - - _range[work[root+1]] = _range[root+1]; - _cblk = cnt; - } - } - - // vertical merging - if (_cblk == 2) { - _tree[0] = -1; - _range[0] = 0; - _range[1] = _range[2]; - _cblk = 1; - } else { - ordinal_type cnt = 0; - for (ordinal_type i=0;i<_cblk;++i) { - const ordinal_type diff = _tree[i+1] - _tree[i]; - work[i] = (diff == 1 ? cnt : cnt++); - } - work[_cblk] = cnt; - - ordinal_type prev = -2; - const ordinal_type root = _cblk - 1; - for (ordinal_type i=0;i<root;++i) { - const ordinal_type myparent = _tree[i]; - const ordinal_type me = work[i]; - - _tree[me] = work[myparent]; - if (prev != me) { - _range[me] = _range[i]; - prev = me; - } - } - { - const ordinal_type me = work[root]; - _tree[me] = -1; - _range[me] = _range[root]; - - _range[work[root+1]] = _range[root+1]; - _cblk = cnt; - } - } - } - - // cleaning - { - for (ordinal_type i=(_cblk+1);i<_m;++i) { - _tree[i] = 0; - _range[i] = 0; - } - _tree[_cblk] = 0; - } - - return 0; - } - - ostream& showMe(ostream &os) const { - streamsize prec = os.precision(); - os.precision(15); - os << scientific; - - os << " -- Scotch input -- " << endl - << " Base Value = " << _base << endl - << " # of Rows = " << _m << endl - << " # of NonZeros = " << _nnz << endl; - - if (_is_ordered) - os << " -- Ordering -- " << endl - << " CBLK = " << _cblk << endl - << " PERM PERI RANG TREE" << endl; - - const int w = 6; - for (ordinal_type i=0;i<_m;++i) - os << setw(w) << _perm[i] << " " - << setw(w) << _peri[i] << " " - << setw(w) << _range[i] << " " - << setw(w) << _tree[i] << endl; - - os.unsetf(ios::scientific); - os.precision(prec); - - return os; - } - - }; - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/herk.hpp b/lib/kokkos/example/ichol/src/herk.hpp deleted file mode 100644 index 548c495c448604d2bffd7a5dd1d9745ce440fc9e..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/herk.hpp +++ /dev/null @@ -1,91 +0,0 @@ -#pragma once -#ifndef __HERK_HPP__ -#define __HERK_HPP__ - -/// \file herk.hpp -/// \brief Sparse hermitian rank one update on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "control.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template<int ArgUplo, int ArgTrans, int ArgAlgo, - int ArgVariant = Variant::One, - template<int,int> class ControlType = Control> - struct Herk { - - // data-parallel interface - // ======================= - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeC> - KOKKOS_INLINE_FUNCTION - static int invoke(typename ExecViewTypeA::policy_type &policy, - const typename ExecViewTypeA::policy_type::member_type &member, - const ScalarType alpha, - typename ExecViewTypeA::matrix_type &A, - const ScalarType beta, - typename ExecViewTypeC::matrix_type &C); - - // task-data parallel interface - // ============================ - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeC> - class TaskFunctor { - public: - typedef typename ExecViewTypeA::policy_type policy_type; - typedef typename policy_type::member_type member_type; - typedef int value_type; - - private: - ScalarType _alpha, _beta; - typename ExecViewTypeA::matrix_type _A; - typename ExecViewTypeC::matrix_type _C; - - policy_type _policy; - - public: - KOKKOS_INLINE_FUNCTION - TaskFunctor(const policy_type & P, - const ScalarType alpha, - const typename ExecViewTypeA::matrix_type & A, - const ScalarType beta, - const typename ExecViewTypeC::matrix_type & C) - : _alpha(alpha), - _beta(beta), - _A(A), - _C(C), - _policy(P) - { } - - string Label() const { return "Herk"; } - - // task execution - KOKKOS_INLINE_FUNCTION - void apply(value_type &r_val) { - r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, _policy.member_single(), - _alpha, _A, _beta, _C); - } - - // task-data execution - KOKKOS_INLINE_FUNCTION - void apply(const member_type &member, value_type &r_val) { - r_val = Herk::invoke<ScalarType,ExecViewTypeA,ExecViewTypeC>(_policy, member, - _alpha, _A, _beta, _C); - } - - }; - - }; - -} - -#include "herk_u_ct.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/herk_u_ct.hpp b/lib/kokkos/example/ichol/src/herk_u_ct.hpp deleted file mode 100644 index 6de4a2fa5628f0bdd77da6fdfc916ad112569fce..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/herk_u_ct.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once -#ifndef __HERK_U_CT_HPP__ -#define __HERK_U_CT_HPP__ - -/// \file herk_u_ct.hpp -/// \brief Sparse hermitian rank one update on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "herk_u_ct_for_factor_blocked.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp deleted file mode 100644 index 58bba2be3c9c5fba07a3a36a77545bca917778c3..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#pragma once -#ifndef __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__ -#define __HERK_U_CT_FOR_FACTOR_BLOCKED_HPP__ - -/// \file herk_u_ct_for_factor_blocked.hpp -/// \brief Sparse hermitian rank one update on given sparse patterns. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - - // Herk used in the factorization phase - // ==================================== - template<> - template<typename ScalarType, - typename CrsExecViewTypeA, - typename CrsExecViewTypeC> - KOKKOS_INLINE_FUNCTION - int - Herk<Uplo::Upper,Trans::ConjTranspose, - AlgoHerk::ForFactorBlocked> - ::invoke(typename CrsExecViewTypeA::policy_type &policy, - const typename CrsExecViewTypeA::policy_type::member_type &member, - const ScalarType alpha, - typename CrsExecViewTypeA::matrix_type &A, - const ScalarType beta, - typename CrsExecViewTypeC::matrix_type &C) { - typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; - typedef typename CrsExecViewTypeA::value_type value_type; - typedef typename CrsExecViewTypeA::row_view_type row_view_type; - - -if ( false && member.team_rank() == 0 ) { - printf("Herk [%d +%d)x[%d +%d)\n" - , C.OffsetRows() - , C.NumRows() - , C.OffsetCols() - , C.NumCols() - ); -} - - // scale the matrix C with beta - scaleCrsMatrix<ScalarType,CrsExecViewTypeC>(member, beta, C); - - // C(i,j) += alpha*A'(i,k)*A(k,j) - for (ordinal_type k=0;k<A.NumRows();++k) { - row_view_type &a = A.RowView(k); - const ordinal_type nnz = a.NumNonZeros(); - - if (nnz > 0) { - -#if 0 - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, nnz), - [&](const ordinal_type i) { - const ordinal_type row_at_i = a.Col(i); - // const value_type val_at_ik = conj(a.Value(i)); - const value_type val_at_ik = a.Value(i); - - row_view_type &c = C.RowView(row_at_i); - - ordinal_type idx = 0; - for (ordinal_type j=i;j<nnz && (idx > -2);++j) { - const ordinal_type col_at_j = a.Col(j); - const value_type val_at_kj = a.Value(j); - - idx = c.Index(col_at_j, idx); - if (idx >= 0) - c.Value(idx) += alpha*val_at_ik*val_at_kj; - } - }); -#else - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, nnz*nnz), - [&](const ordinal_type ii) { - const ordinal_type i = ii / nnz ; - const ordinal_type j = ii % nnz ; - - row_view_type &c = C.RowView( a.Col(i) ); - - const ordinal_type idx = c.Index( a.Col(j) ); - - if (idx >= 0) { - c.Value(idx) += alpha* a.Value(i) * a.Value(j); - } - }); - -#endif - - member.team_barrier(); - } - } - - return 0; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/norm.hpp b/lib/kokkos/example/ichol/src/norm.hpp deleted file mode 100644 index be77ee0dcf2b27f6a7e50fb8eeacb45dc9d50e82..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/norm.hpp +++ /dev/null @@ -1,82 +0,0 @@ -#pragma once -#ifndef __NORM_HPP__ -#define __NORM_HPP__ - -/// \file norm.hpp -/// \brief Compute norm of sparse or dense matrices. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - template<typename DenseExecViewType> - KOKKOS_INLINE_FUNCTION - auto - normOneDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { - typedef typename DenseExecViewType::ordinal_type ordinal_type; - typedef typename DenseExecViewType::value_type value_type; - typedef decltype(real(value_type())) norm_type; - - const ordinal_type mA = A.NumRows(); - const ordinal_type nA = A.NumCols(); - - norm_type r_val = 0.0; - - for (ordinal_type j=0;j<nA;++j) { - norm_type col_sum_at_j = 0.0; - for (ordinal_type i=0;i<mA;++i) - col_sum_at_j += abs(A.Value(i,j)); - r_val = max(r_val, col_sum_at_j); - } - return r_val; - } - - template<typename DenseExecViewType> - KOKKOS_INLINE_FUNCTION - auto - normInfDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { - typedef typename DenseExecViewType::ordinal_type ordinal_type; - typedef typename DenseExecViewType::value_type value_type; - typedef decltype(real(value_type())) norm_type; - - const ordinal_type mA = A.NumRows(); - const ordinal_type nA = A.NumCols(); - - norm_type r_val = 0.0; - - for (ordinal_type i=0;i<mA;++i) { - norm_type row_sum_at_i = 0.0; - for (ordinal_type j=0;j<nA;++j) - row_sum_at_i += abs(A.Value(i,j)); - r_val = max(r_val, row_sum_at_i); - } - return r_val; - } - - template<typename DenseExecViewType> - KOKKOS_INLINE_FUNCTION - auto - normFrobeniusDenseMatrix(DenseExecViewType &A) -> decltype(real(typename DenseExecViewType::value_type())) { - typedef typename DenseExecViewType::ordinal_type ordinal_type; - typedef typename DenseExecViewType::value_type value_type; - typedef decltype(real(value_type())) norm_type; - - const ordinal_type mA = A.NumRows(); - const ordinal_type nA = A.NumCols(); - - norm_type r_val = 0.0; - - for (ordinal_type i=0;i<mA;++i) - for (ordinal_type j=0;j<nA;++j) { - value_type val = A.Value(i,j); - // r_val += conj(val)*val; - r_val += val*val; - } - return sqrt(r_val); - } - -} - -#endif - diff --git a/lib/kokkos/example/ichol/src/partition.hpp b/lib/kokkos/example/ichol/src/partition.hpp deleted file mode 100644 index a3e9f7095a6b82b62e6c27bc5f91db0e253b0451..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/partition.hpp +++ /dev/null @@ -1,381 +0,0 @@ - -#ifndef __PARTITION_HPP__ -#define __PARTITION_HPP__ - -/// \file partition.hpp -/// \brief Matrix partitioning utilities. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_2x2(const MatView A, MatView &ATL, MatView &ATR, - /**************/ MatView &ABL, MatView &ABR, - const typename MatView::ordinal_type bm, - const typename MatView::ordinal_type bn, - const int quadrant) { - typename MatView::ordinal_type bmm, bnn; - - switch (quadrant) { - case Partition::TopLeft: - bmm = min(bm, A.NumRows()); - bnn = min(bn, A.NumCols()); - - ATL.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - case Partition::TopRight: - case Partition::BottomLeft: - Kokkos::abort("Tacho::Part_2x2 Not yet implemented"); - break; - case Partition::BottomRight: - bmm = A.NumRows() - min(bm, A.NumRows()); - bnn = A.NumCols() - min(bn, A.NumCols()); - - ATL.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - default: - Kokkos::abort("Tacho::Part_2x2 Invalid Input"); - break; - } - - ATR.setView(A.BaseObject(), - A.OffsetRows(), ATL.NumRows(), - A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols()); - - ABL.setView(A.BaseObject(), - A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(), - A.OffsetCols(), ATL.NumCols()); - - ABR.setView(A.BaseObject(), - A.OffsetRows() + ATL.NumRows(), A.NumRows() - ATL.NumRows(), - A.OffsetCols() + ATL.NumCols(), A.NumCols() - ATL.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_1x2(const MatView A, MatView &AL, MatView &AR, - const typename MatView::ordinal_type bn, - const int side) { - typename MatView::ordinal_type bmm, bnn; - - switch (side) { - case Partition::Left: - bmm = A.NumRows(); - bnn = min(bn, A.NumCols()); - - AL.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - case Partition::Right: - bmm = A.NumRows(); - bnn = A.NumCols() - min(bn, A.NumCols()); - - AL.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - default: - Kokkos::abort("Tacho::Part_1x2 Invalid Input"); - break; - } - - AR.setView(A.BaseObject(), - A.OffsetRows(), A.NumRows(), - A.OffsetCols() + AL.NumCols(), A.NumCols() - AL.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_2x1(const MatView A, MatView &AT, - /*************/ MatView &AB, - const typename MatView::ordinal_type bm, - const int side) { - typename MatView::ordinal_type bmm, bnn; - - switch (side) { - case Partition::Top: - bmm = min(bm, A.NumRows()); - bnn = A.NumCols(); - - AT.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - case Partition::Bottom: - bmm = A.NumRows() - min(bm, A.NumRows()); - bnn = A.NumCols(); - - AT.setView(A.BaseObject(), - A.OffsetRows(), bmm, - A.OffsetCols(), bnn); - break; - default: - Kokkos::abort("Tacho::Part_2x1 Invalid Input"); - break; - } - - AB.setView(A.BaseObject(), - A.OffsetRows() + AT.NumRows(), A.NumRows() - AT.NumRows(), - A.OffsetCols(), A.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_2x2_to_3x3(const MatView ATL, const MatView ATR, MatView &A00, MatView &A01, MatView &A02, - /***********************************/ MatView &A10, MatView &A11, MatView &A12, - const MatView ABL, const MatView ABR, MatView &A20, MatView &A21, MatView &A22, - const typename MatView::ordinal_type bm, - const typename MatView::ordinal_type bn, - const int quadrant) { - switch (quadrant) { - case Partition::TopLeft: - Part_2x2(ATL, A00, A01, - /**/ A10, A11, - bm, bn, Partition::BottomRight); - - Part_2x1(ATR, A02, - /**/ A12, - bm, Partition::Bottom); - - Part_1x2(ABL, A20, A21, - bn, Partition::Right); - - A22.setView(ABR.BaseObject(), - ABR.OffsetRows(), ABR.NumRows(), - ABR.OffsetCols(), ABR.NumCols()); - break; - case Partition::TopRight: - case Partition::BottomLeft: - Kokkos::abort("Tacho::Part_???"); - break; - case Partition::BottomRight: - A00.setView(ATL.BaseObject(), - ATL.OffsetRows(), ATL.NumRows(), - ATL.OffsetCols(), ATL.NumCols()); - - Part_1x2(ATR, A01, A02, - bn, Partition::Left); - - Part_2x1(ABL, A10, - /**/ A20, - bm, Partition::Top); - - Part_2x2(ABR, A11, A12, - /**/ A21, A22, - bm, bn, Partition::TopLeft); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_2x1_to_3x1(const MatView AT, MatView &A0, - /***************/ MatView &A1, - const MatView AB, MatView &A2, - const typename MatView::ordinal_type bm, - const int side) { - switch (side) { - case Partition::Top: - Part_2x1(AT, A0, - /**/ A1, - bm, Partition::Bottom); - - A2.setView(AB.BaseObject(), - AB.OffsetRows(), AB.NumRows(), - AB.OffsetCols(), AB.NumCols()); - break; - case Partition::Bottom: - A0.setView(AT.BaseObject(), - AT.OffsetRows(), AT.NumRows(), - AT.OffsetCols(), AT.NumCols()); - - Part_2x1(AB, A1, - /**/ A2, - bm, Partition::Top); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Part_1x2_to_1x3(const MatView AL, const MatView AR, - MatView &A0, MatView &A1, MatView &A2, - const typename MatView::ordinal_type bn, - const int side) { - switch (side) { - case Partition::Left: - Part_1x2(AL, A0, A1, - bn, Partition::Right); - - A2.setView(AR.BaseObaject(), - AR.OffsetRows(), AR.NumRows(), - AR.OffsetCols(), AR.NumCols()); - break; - case Partition::Right: - A0.setView(AL.BaseObject(), - AL.OffsetRows(), AL.NumRows(), - AL.OffsetCols(), AL.NumCols()); - - Part_1x2(AR, A1, A2, - bn, Partition::Left); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_2x2(const MatView ATL, const MatView ATR, - const MatView ABL, const MatView ABR, MatView &A) { - A.setView(ATL.BaseObject(), - ATL.OffsetRows(), ATL.NumRows() + ABR.NumRows(), - ATL.OffsetCols(), ATL.NumCols() + ABR.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_1x2(const MatView AL, const MatView AR, MatView &A) { - A.setView(AL.BaseObject(), - AL.OffsetRows(), AL.NumRows(), - AL.OffsetCols(), AL.NumCols() + AR.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_2x1(const MatView AT, - const MatView AB, MatView &A) { - A.setView(AT.BaseObject(), - AT.OffsetRows(), AT.NumRows() + AB.NumRows(), - AT.OffsetCols(), AT.NumCols()); - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_3x3_to_2x2(const MatView A00, const MatView A01, const MatView A02, MatView &ATL, MatView &ATR, - const MatView A10, const MatView A11, const MatView A12, - const MatView A20, const MatView A21, const MatView A22, MatView &ABL, MatView &ABR, - const int quadrant) { - switch (quadrant) { - case Partition::TopLeft: - Merge_2x2(A00, A01, - A10, A11, ATL); - - Merge_2x1(A02, - A12, ATR); - - Merge_1x2(A20, A21, ABL); - - ABR.setView(A22.BaseObject(), - A22.OffsetRows(), A22.NumRows(), - A22.OffsetCols(), A22.NumCols()); - break; - case Partition::TopRight: - case Partition::BottomLeft: - Kokkos::abort("Tacho::Part_???"); - break; - case Partition::BottomRight: - ATL.setView(A00.BaseObject(), - A00.OffsetRows(), A00.NumRows(), - A00.OffsetCols(), A00.NumCols()); - - Merge_1x2(A01, A02, ATR); - - Merge_2x1(A10, - A20, ABL); - - Merge_2x2(A11, A12, - A21, A22, ABR); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_3x1_to_2x1(const MatView A0, MatView &AT, - const MatView A1, - const MatView A2, MatView &AB, - const int side) { - switch (side) { - case Partition::Top: - Merge_2x1(A0, - A1, AT); - - AB.setView(A2.BaseObject(), - A2.OffsetRows(), A2.NumRows(), - A2.OffsetCols(), A2.NumCols()); - break; - case Partition::Bottom: - AT.setView(A0.BaseObject(), - A0.OffsetRows(), A0.NumRows(), - A0.OffsetCols(), A0.NumCols()); - - Merge_2x1(A1, - A2, AB); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - template<typename MatView> - KOKKOS_INLINE_FUNCTION - void - Merge_1x3_to_1x2(const MatView A0, const MatView A1, const MatView A2, - MatView &AL, MatView &AR, - const int side) { - switch (side) { - case Partition::Left: - Merge_1x2(A0, A1, AL); - - AR.setView(A2.BaseObject(), - A2.OffsetRows(), A2.NumRows(), - A2.OffsetCols(), A2.NumCols()); - break; - case Partition::Right: - AL.setView(A0.BaseObject(), - A0.OffsetRows(), A0.NumRows(), - A0.OffsetCols(), A0.NumCols()); - - Merge_1x2(A1, A2, AR); - break; - default: - Kokkos::abort("Tacho::Part_???"); - break; - } - } - - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/scale.hpp b/lib/kokkos/example/ichol/src/scale.hpp deleted file mode 100644 index 3152520966d88caeaede7d81c8a9bf826400d610..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/scale.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once -#ifndef __SCALE_HPP__ -#define __SCALE_HPP__ - -/// \file scale.hpp -/// \brief Scaling sparse matrix. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - template<typename T> struct ScaleTraits { - typedef T scale_type; - // assume built-in types have appropriate type conversion - static constexpr T one = 1 ; - static constexpr T zero = 0 ; - }; - - - template<typename ScalarType, - typename CrsExecViewType> - KOKKOS_INLINE_FUNCTION - int - scaleCrsMatrix(const typename CrsExecViewType::policy_type::member_type &member, - const ScalarType alpha, - typename CrsExecViewType::matrix_type &A) { - typedef typename CrsExecViewType::ordinal_type ordinal_type; - typedef typename CrsExecViewType::value_type value_type; - typedef typename CrsExecViewType::row_view_type row_view_type; - - if (alpha == ScaleTraits<value_type>::one) { - // do nothing - } else { - const ordinal_type mA = A.NumRows(); - if (mA > 0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA), - [&](const ordinal_type i) { - row_view_type &row = A.RowView(i); - for (ordinal_type j=0;j<row.NumNonZeros();++j) - row.Value(j) *= alpha; - }); - member.team_barrier(); - } - } - - return 0; - } - - template<typename ScalarType, - typename DenseExecViewType> - KOKKOS_INLINE_FUNCTION - int - scaleDenseMatrix(const typename DenseExecViewType::policy_type::member_type &member, - const ScalarType alpha, - DenseExecViewType &A) { - typedef typename DenseExecViewType::ordinal_type ordinal_type; - typedef typename DenseExecViewType::value_type value_type; - - if (alpha == ScaleTraits<value_type>::one) { - // do nothing - } else { - if (A.BaseObject().ColStride() > A.BaseObject().RowStride()) { - const ordinal_type nA = A.NumCols(); - if (nA > 0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nA), - [&](const ordinal_type j) { - for (ordinal_type i=0;i<A.NumRows();++i) - A.Value(i, j) *= alpha; - }); - member.team_barrier(); - } - } else { - const ordinal_type mA = A.NumRows(); - if (mA > 0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mA), - [&](const ordinal_type i) { - for (ordinal_type j=0;j<A.NumCols();++j) - A.Value(i, j) *= alpha; - }); - member.team_barrier(); - } - } - } - - return 0; - } - -} - -#endif - diff --git a/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp b/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp deleted file mode 100644 index f6c381a99817ca5254ef3563fe48941410870ad7..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp +++ /dev/null @@ -1,379 +0,0 @@ -#pragma once -#ifndef __SYMBOLIC_FACTOR_HELPER_HPP__ -#define __SYMBOLIC_FACTOR_HELPER_HPP__ - -/// \file symbolic_factor_helper.hpp -/// \brief The class compute a nonzero pattern with a given level of fills -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" - -namespace Tacho { - - using namespace std; - - template<class CrsMatrixType> - class SymbolicFactorHelper : public Disp { - public: - typedef typename CrsMatrixType::ordinal_type ordinal_type; - typedef typename CrsMatrixType::size_type size_type; - - typedef typename Kokkos::HostSpace::execution_space host_exec_space ; - - typedef typename CrsMatrixType::ordinal_type_array ordinal_type_array; - typedef typename CrsMatrixType::size_type_array size_type_array; - typedef typename CrsMatrixType::value_type_array value_type_array; - - private: - string _label; // name of this class - - // matrix index base - CrsMatrixType _A; // input matrix - ordinal_type _m, _n; // matrix dimension - - struct crs_graph { - size_type_array _ap; // row ptr array - ordinal_type_array _aj; // col index array - size_type _nnz; // # of nonzeros - }; - typedef struct crs_graph crs_graph_type; - crs_graph_type _in, _out; - - typedef Kokkos::View<ordinal_type**, Kokkos::LayoutLeft, host_exec_space> league_specific_ordinal_type_array; - typedef typename league_specific_ordinal_type_array::value_type* league_specific_ordinal_type_array_ptr; - - int _lsize; - league_specific_ordinal_type_array _queue, _visited, _distance; - - void createInternalWorkSpace() { - _queue = league_specific_ordinal_type_array(_label+"::QueueArray", _m, _lsize); - _visited = league_specific_ordinal_type_array(_label+"::VisitedArray", _m, _lsize); - _distance = league_specific_ordinal_type_array(_label+"::DistanceArray", _m, _lsize); - } - - void freeInternalWorkSpace() { - _queue = league_specific_ordinal_type_array(); - _visited = league_specific_ordinal_type_array(); - _distance = league_specific_ordinal_type_array(); - } - - public: - - void setLabel(string label) { _label = label; } - string Label() const { return _label; } - - SymbolicFactorHelper(const CrsMatrixType &A, - const int lsize = (host_exec_space::thread_pool_size(0)/ - host_exec_space::thread_pool_size(2))) { - - _label = "SymbolicFactorHelper::" ; - - // matrix index base and the number of rows - _A = A; - - _m = _A.NumRows(); - _n = _A.NumCols(); - - // allocate memory for input crs matrix - _in._nnz = _A.NumNonZeros(); - _in._ap = size_type_array(_label+"::Input::RowPtrArray", _m+1); - _in._aj = ordinal_type_array(_label+"::Input::ColIndexArray", _in._nnz); - - // adjust graph structure; A is assumed to have a graph without its diagonal - A.convertGraph(_in._ap, _in._aj); - _in._nnz = _in._ap[_m]; - - // league size - _lsize = lsize; - - // create workspace per league - createInternalWorkSpace(); - } - virtual~SymbolicFactorHelper() { - freeInternalWorkSpace(); - } - - class Queue { - private: - league_specific_ordinal_type_array_ptr _q; - ordinal_type _begin, _end; - - public: - Queue(league_specific_ordinal_type_array_ptr q) - : _q(q),_begin(0),_end(0) { } - - ordinal_type size() const { return _end - _begin; } - bool empty() const { return !size(); } - - void push(const ordinal_type val) { _q[_end++] = val; } - ordinal_type pop() { return _q[_begin++]; } - ordinal_type end() { return _end; } - void reset() { _begin = 0; _end = 0; } - }; - - class FunctorComputeNonZeroPatternInRow { - public: - typedef Kokkos::TeamPolicy<host_exec_space> policy_type; - - private: - ordinal_type _level, _m; - crs_graph_type _graph; - - league_specific_ordinal_type_array _queue; - league_specific_ordinal_type_array _visited; - league_specific_ordinal_type_array _distance; - - size_type_array _ap; - ordinal_type_array _aj; - - ordinal_type _phase; - - public: - FunctorComputeNonZeroPatternInRow(const ordinal_type level, - const ordinal_type m, - const crs_graph_type &graph, - league_specific_ordinal_type_array &queue, - league_specific_ordinal_type_array &visited, - league_specific_ordinal_type_array &distance, - size_type_array &ap, - ordinal_type_array &aj) - : _level(level), _m(m), _graph(graph), - _queue(queue), _visited(visited), _distance(distance), - _ap(ap), _aj(aj), _phase(0) - { } - - void setPhaseCountNumNonZeros() { _phase = 0; } - void setPhaseComputeColIndex() { _phase = 1; } - - inline - void operator()(const typename policy_type::member_type &member) const { - const int lrank = member.league_rank(); - const int lsize = member.league_size(); - - league_specific_ordinal_type_array_ptr queue = &_queue(0, lrank); - league_specific_ordinal_type_array_ptr distance = &_distance(0, lrank); - league_specific_ordinal_type_array_ptr visited = &_visited(0, lrank); - - for (ordinal_type i=0;i<_m;++i) - visited[i] = 0; - - // shuffle rows to get better load balance; - // for instance, if ND is applied, more fills are generated in the last seperator. - for (ordinal_type i=lrank;i<_m;i+=lsize) { - - size_type cnt = 0; - - // account for the diagonal - switch (_phase) { - case 0: - cnt = 1; - break; - case 1: - cnt = _ap[i]; - _aj[cnt++] = i; - break; - } - - { - Queue q(queue); // fixed size queue - - // initialize work space - q.push(i); - distance[i] = 0; - - const ordinal_type id = (i+1); - visited[i] = id; - - // breath first search for i - while (!q.empty()) { - const ordinal_type h = q.pop(); - // loop over j adjancy - const ordinal_type jbegin = _graph._ap[h], jend = _graph._ap[h+1]; - for (ordinal_type j=jbegin;j<jend;++j) { - const ordinal_type t = _graph._aj[j]; - if (visited[t] != id) { - visited[t] = id; - - if (t < i && (_level < 0 || distance[h] < _level)) { - q.push(t); - distance[t] = distance[h] + 1; - } - if (t > i) { - switch (_phase) { - case 0: - ++cnt; - break; - case 1: - _aj[cnt++] = t; - break; - } - } - } - } - } - - // clear work space - for (ordinal_type j=0;j<q.end();++j) { - const ordinal_type jj = queue[j]; - distance[jj] = 0; - } - q.reset(); - } - switch (_phase) { - case 0: - _ap[i+1] = cnt; - break; - case 1: - sort(_aj.data() + _ap[i] , _aj.data() + _ap[i+1]); - break; - } - } - } - }; - - class FunctorCountOffsetsInRow { - public: - typedef Kokkos::RangePolicy<host_exec_space> policy_type; - typedef size_type value_type; - - private: - size_type_array _off_in_rows; - - public: - FunctorCountOffsetsInRow(size_type_array &off_in_rows) - : _off_in_rows(off_in_rows) - { } - - KOKKOS_INLINE_FUNCTION - void init(value_type &update) const { - update = 0; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const typename policy_type::member_type &i, value_type &update, const bool final) const { - update += _off_in_rows(i); - if (final) - _off_in_rows(i) = update; - } - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type &update, - volatile const value_type &input) const { - update += input; - } - }; - - int createNonZeroPattern(const ordinal_type level, - const int uplo, - CrsMatrixType &F) { - // all output array should be local and rcp in Kokkos::View manage memory (de)allocation - size_type_array ap = size_type_array(_label+"::Output::RowPtrArray", _m+1); - - // later determined - ordinal_type_array aj; - value_type_array ax; - size_type nnz = 0; - - { - FunctorComputeNonZeroPatternInRow functor(level, _m, _in, - _queue, - _visited, - _distance, - ap, - aj); - - functor.setPhaseCountNumNonZeros(); - Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor); - } - { - FunctorCountOffsetsInRow functor(ap); - Kokkos::parallel_scan(typename FunctorCountOffsetsInRow::policy_type(0, _m+1), functor); - } - - nnz = ap[_m]; - aj = ordinal_type_array(_label+"::Output::ColIndexArray", nnz); - ax = value_type_array(_label+"::Output::ValueArray", nnz); - - { - FunctorComputeNonZeroPatternInRow functor(level, _m, _in, - _queue, - _visited, - _distance, - ap, - aj); - - functor.setPhaseComputeColIndex(); - Kokkos::parallel_for(typename FunctorComputeNonZeroPatternInRow::policy_type(_lsize, 1), functor); - } - - { - F = CrsMatrixType("dummy", _m, _n, nnz, ap, aj, ax); - F.add(_A); - } - - // record the symbolic factors - _out._nnz = nnz; - _out._ap = ap; - _out._aj = aj; - - return 0; - } - - int createNonZeroPattern(const int uplo, - CrsMatrixType &F) { - return createNonZeroPattern(-1, uplo, F); - } - - ostream& showMe(ostream &os) const { - streamsize prec = os.precision(); - os.precision(15); - os << scientific; - - const int w = 6; - - os << " -- Matrix Dimension -- " << endl - << " # of Rows = " << _m << endl - << " # of Cols = " << _n << endl; - - os << endl; - - os << " -- Input Graph Without Diagonals -- " << endl - << " # of NonZeros = " << _in._nnz << endl ; - - os << " -- Input Graph :: RowPtr -- " << endl; - { - const ordinal_type n0 = _in._ap.dimension_0(); - for (ordinal_type i=0;i<n0;++i) - os << setw(w) << i - << setw(w) << _in._ap[i] - << endl; - } - - os << endl; - - os << " -- Output Graph With Diagonals-- " << endl - << " # of NonZeros = " << _out._nnz << endl ; - - os << " -- Output Graph :: RowPtr -- " << endl; - { - const ordinal_type n0 = _out._ap.dimension_0(); - for (ordinal_type i=0;i<n0;++i) - os << setw(w) << i - << setw(w) << _out._ap[i] - << endl; - } - - os.unsetf(ios::scientific); - os.precision(prec); - - return os; - } - - }; - -} - -#endif - - - diff --git a/lib/kokkos/example/ichol/src/symbolic_task.hpp b/lib/kokkos/example/ichol/src/symbolic_task.hpp deleted file mode 100644 index f6cdc28ab133d123803fff40d5906cfaa58371ea..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/symbolic_task.hpp +++ /dev/null @@ -1,118 +0,0 @@ -#pragma once -#ifndef __SYMBOLIC_TASK_HPP__ -#define __SYMBOLIC_TASK_HPP__ - -/// \file symbolic_task.hpp -/// \brief Provides tasking interface with graphviz output. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - /// \brief Graphviz color mapping for the generated tasks. - static map<string,string> g_graphviz_color = { - { "chol/scalar", "indianred2"}, - { "chol/trsm", "orange2" }, - { "chol/gemm", "lightblue2"} }; - - class SymbolicTaskQueue; - - class SymbolicTask { - private: - string _name; - set<SymbolicTask*> _dep_tasks; - - public: - // at this moment, make the queue global - // but this should be local and work with - // multiple queues with separate thread teams - typedef SymbolicTaskQueue queue; - - SymbolicTask() - : _name("no-name") - { } - - SymbolicTask(const SymbolicTask &b) - : _name(b._name) - { } - - SymbolicTask(const string name) - : _name(name) - { } - - int addDependence(SymbolicTask *b) { - if (b != NULL) - _dep_tasks.insert(b); - return 0; - } - - int clearDependence() { - _dep_tasks.clear(); - return 0; - } - - ostream& showMe(ostream &os) const { - os << " uid = " << this << " , name = " << _name << ", # of deps = " << _dep_tasks.size() << endl; - if (_dep_tasks.size()) { - for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) - os << " " << (*it) << " , name = " << (*it)->_name << endl; - } - return os; - } - - ostream& graphviz(ostream &os) const { - os << (long)(this) - << " [label=\"" << _name ; - auto it = g_graphviz_color.find(_name); - if (it != g_graphviz_color.end()) - os << "\" ,style=filled,color=\"" << it->second << "\" "; - os << "];"; - for (auto it=_dep_tasks.begin();it!=_dep_tasks.end();++it) - os << (long)(*it) << " -> " << (long)this << ";"; - return (os << endl); - } - - }; - - static vector<SymbolicTask*> g_queue; - - class SymbolicTaskQueue { - public: - static SymbolicTask* push(SymbolicTask *task) { - g_queue.push_back(task); - return g_queue.back(); - } - - static int clear() { - for (auto it=g_queue.begin();it!=g_queue.end();++it) - delete (*it); - g_queue.clear(); - return 0; - } - - static ostream& showMe(ostream &os) { - if (g_queue.size()) { - os << " -- Symbolic Task Queue -- " << endl; - for (auto it=g_queue.begin();it!=g_queue.end();++it) - (*it)->showMe(os); - } else { - os << " -- Symbolic Task Queue is empty -- " << endl; - } - return os; - } - - static ostream& graphviz(ostream &os, - const double width = 7.5, - const double length = 10.0) { - os << "digraph TaskGraph {" << endl; - os << "size=\"" << width << "," << length << "\";" << endl; - for (auto it=g_queue.begin();it!=g_queue.end();++it) - (*it)->graphviz(os); - os << "}" << endl; - return (os << endl); - } - }; - -} -#endif diff --git a/lib/kokkos/example/ichol/src/task_factory.hpp b/lib/kokkos/example/ichol/src/task_factory.hpp deleted file mode 100644 index b829da6737dfa3423b800aa6021b2c33e94b2c78..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/task_factory.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once -#ifndef __TASK_FACTORY_HPP__ -#define __TASK_FACTORY_HPP__ - -/// \file task_factory.hpp -/// \brief A wrapper for task policy and future with a provided space type. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - /// \class TaskFactory - /// \brief Minimal interface to Kokkos tasking. - /// - /// TaskFactory is attached to blocks as a template argument in order to - /// create and manage tasking future objects. Note that policy (shared - /// pointer to the task generator) is not a member object in this class. - /// This class includes minimum interface for tasking with type decralation - /// of the task policy and template alias of future so that future objects - /// generated in this class will match to their policy and its execution space. - /// - template<typename PolicyType, - typename FutureType> - class TaskFactory { - private: - static constexpr int _max_task_dependence = 10 ; - - public: - typedef PolicyType policy_type; - typedef FutureType future_type; - - template<typename TaskFunctorType> - static KOKKOS_INLINE_FUNCTION - future_type create(policy_type &policy, const TaskFunctorType &func) { - - future_type f ; - // while ( f.is_null() ) { - f = policy.task_create_team(func, _max_task_dependence); - // } - if ( f.is_null() ) Kokkos::abort("task_create_team FAILED, out of memory"); - return f ; - } - - static KOKKOS_INLINE_FUNCTION - void spawn(policy_type &policy, const future_type &obj, bool priority = false ) { - policy.spawn(obj,priority); - } - - static KOKKOS_INLINE_FUNCTION - void addDependence(policy_type &policy, - const future_type &after, const future_type &before) { - policy.add_dependence(after, before); - } - - template<typename TaskFunctorType> - static KOKKOS_INLINE_FUNCTION - void addDependence(policy_type &policy, - TaskFunctorType *after, const future_type &before) { - policy.add_dependence(after, before); - } - - template<typename TaskFunctorType> - static KOKKOS_INLINE_FUNCTION - void clearDependence(policy_type &policy, TaskFunctorType *func) { - policy.clear_dependence(func); - } - - template<typename TaskFunctorType> - static KOKKOS_INLINE_FUNCTION - void respawn(policy_type &policy, TaskFunctorType *func) { - policy.respawn(func); - } - }; -} - -#endif diff --git a/lib/kokkos/example/ichol/src/task_view.hpp b/lib/kokkos/example/ichol/src/task_view.hpp deleted file mode 100644 index ce280a325fd6a460c687f15e0a69c4aa6dd0e8b5..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/task_view.hpp +++ /dev/null @@ -1,104 +0,0 @@ -#pragma once -#ifndef __TASK_VIEW_HPP__ -#define __TASK_VIEW_HPP__ - -/// \file task_view.hpp -/// \brief Task view is inherited from matrix view and have a member for the task handler. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -namespace Tacho { - - using namespace std; - - template<typename MatrixViewType, - typename TaskFactoryType> - class TaskView : public MatrixViewType { - public: - typedef MatrixViewType matrix_type ; - typedef typename MatrixViewType::value_type value_type; - typedef typename MatrixViewType::ordinal_type ordinal_type; - - typedef TaskFactoryType task_factory_type; - typedef typename task_factory_type::policy_type policy_type; - typedef typename task_factory_type::future_type future_type; - - private: - future_type _f; - - public: - KOKKOS_INLINE_FUNCTION - void setFuture(const future_type &f) - { _f = f; } - - KOKKOS_INLINE_FUNCTION - future_type Future() const { return _f; } - - KOKKOS_INLINE_FUNCTION - ~TaskView() = default ; - - KOKKOS_INLINE_FUNCTION - TaskView() - : MatrixViewType(), _f() - { } - - TaskView(const TaskView &b) = delete ; - - KOKKOS_INLINE_FUNCTION - TaskView(typename MatrixViewType::mat_base_type const & b) - : MatrixViewType(b), _f() - { } - - KOKKOS_INLINE_FUNCTION - TaskView(typename MatrixViewType::mat_base_type const & b, - const ordinal_type offm, const ordinal_type m, - const ordinal_type offn, const ordinal_type n) - : MatrixViewType(b, offm, m, offn, n), _f() - { } - - }; -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if ! KOKKOS_USING_EXP_VIEW - -namespace Kokkos { - namespace Impl { - - // The Kokkos::View allocation will by default assign each allocated datum to zero. - // This is not the required initialization behavior when - // non-trivial objects are used within a Kokkos::View. - // Create a partial specialization of the Kokkos::Impl::AViewDefaultConstruct - // to replace the assignment initialization with placement new initialization. - // - // This work-around is necessary until a TBD design refactorization of Kokkos::View. - - template< class ExecSpace , typename T1, typename T2 > - struct ViewDefaultConstruct< ExecSpace , Tacho::TaskView<T1,T2> , true > - { - typedef Tacho::TaskView<T1,T2> type ; - type * const m_ptr ; - - KOKKOS_FORCEINLINE_FUNCTION - void operator()( const typename ExecSpace::size_type& i ) const - { new(m_ptr+i) type(); } - - ViewDefaultConstruct( type * pointer , size_t capacity ) - : m_ptr( pointer ) - { - Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); - parallel_for( range , *this ); - ExecSpace::fence(); - } - }; - - } // namespace Impl -} // namespace Kokkos - -#endif /* #if ! KOKKOS_USING_EXP_VIEW */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif diff --git a/lib/kokkos/example/ichol/src/trsm.hpp b/lib/kokkos/example/ichol/src/trsm.hpp deleted file mode 100644 index b4a6a7df48967257f824ae73680bf918d457be76..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/trsm.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once -#ifndef __TRSM_HPP__ -#define __TRSM_HPP__ - -/// \file trsm.hpp -/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "util.hpp" -#include "control.hpp" -#include "partition.hpp" - -namespace Tacho { - - using namespace std; - - template<int ArgSide,int ArgUplo, int ArgTrans, int ArgAlgo, - int ArgVariant = Variant::One, - template<int,int> class ControlType = Control> - struct Trsm { - - // data-parallel interface - // ======================= - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeB> - KOKKOS_INLINE_FUNCTION - static int invoke(typename ExecViewTypeA::policy_type &policy, - const typename ExecViewTypeA::policy_type::member_type &member, - const int diagA, - const ScalarType alpha, - typename ExecViewTypeA::matrix_type &A, - typename ExecViewTypeB::matrix_type &B); - - // task-data parallel interface - // ============================ - template<typename ScalarType, - typename ExecViewTypeA, - typename ExecViewTypeB> - class TaskFunctor { - public: - typedef typename ExecViewTypeA::policy_type policy_type; - typedef typename policy_type::member_type member_type; - typedef int value_type; - - private: - int _diagA; - ScalarType _alpha; - typename ExecViewTypeA::matrix_type _A; - typename ExecViewTypeB::matrix_type _B; - - policy_type _policy; - - public: - KOKKOS_INLINE_FUNCTION - TaskFunctor(const policy_type & P, - const int diagA, - const ScalarType alpha, - const ExecViewTypeA & A, - const ExecViewTypeB & B) - : _diagA(diagA), - _alpha(alpha), - _A(A), - _B(B), - _policy(P) - { } - - string Label() const { return "Trsm"; } - - // task execution - KOKKOS_INLINE_FUNCTION - void apply(value_type &r_val) { - r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, _policy.member_single(), - _diagA, _alpha, _A, _B); - } - - // task-data execution - KOKKOS_INLINE_FUNCTION - void apply(const member_type &member, value_type &r_val) { - r_val = Trsm::invoke<ScalarType,ExecViewTypeA,ExecViewTypeB>(_policy, member, - _diagA, _alpha, _A, _B); - } - - }; - }; - -} - -// #include "trsm_l_u_nt.hpp" -#include "trsm_l_u_ct.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp deleted file mode 100644 index b6f3289474518bd88e55db198e4d2ad8efa7e435..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#ifndef __TRSM_L_U_CT_HPP__ -#define __TRSM_L_U_CT_HPP__ - -/// \file trsm_l_u_ct.hpp -/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. -/// \author Kyungjoo Kim (kyukim@sandia.gov) -/// -#include "gemm.hpp" - -#include "trsm_l_u_ct_for_factor_blocked.hpp" -// #include "trsm_l_u_ct_for_tri_solve_blocked.hpp" - -#endif diff --git a/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp b/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp deleted file mode 100644 index 7414e5d80f07f895a8cd4e5182acb3fc9976be58..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp +++ /dev/null @@ -1,185 +0,0 @@ -#pragma once -#ifndef __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__ -#define __TRSM_L_U_CT_FOR_FACTOR_BLOCKED_HPP__ - -/// \file trsm_l_u_ct_for_factor_blocked.hpp -/// \brief Sparse triangular solve on given sparse patterns and multiple rhs. -/// \author Kyungjoo Kim (kyukim@sandia.gov) -/// - -namespace Tacho { - - using namespace std; - - // Trsm used in the factorization phase: data parallel on b1t - // ========================================================== - template<> - template<typename ScalarType, - typename CrsExecViewTypeA, - typename CrsExecViewTypeB> - KOKKOS_INLINE_FUNCTION - int - Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose, - AlgoTrsm::ForFactorBlocked,Variant::One> - ::invoke(typename CrsExecViewTypeA::policy_type &policy, - const typename CrsExecViewTypeA::policy_type::member_type &member, - const int diagA, - const ScalarType alpha, - typename CrsExecViewTypeA::matrix_type &A, - typename CrsExecViewTypeB::matrix_type &B) { - typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; - typedef typename CrsExecViewTypeA::value_type value_type; - typedef typename CrsExecViewTypeA::row_view_type row_view_type; - - -if ( false && member.team_rank() == 0 ) { - printf("Trsm [%d +%d)x[%d +%d)\n" - , B.OffsetRows() - , B.NumRows() - , B.OffsetCols() - , B.NumCols() - ); -} - - // scale the matrix B with alpha - scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B); - - // Solve a system: AX = B -> B := inv(A) B - const ordinal_type mA = A.NumRows(); - const ordinal_type nB = B.NumCols(); - - if (nB > 0) { - for (ordinal_type k=0;k<mA;++k) { - row_view_type &a = A.RowView(k); - // const value_type cdiag = std::conj(a.Value(0)); // for complex<T> - const value_type cdiag = a.Value(0); - - // invert - row_view_type &b1 = B.RowView(k); - const ordinal_type nnz_b1 = b1.NumNonZeros(); - - if (diagA != Diag::Unit && nnz_b1 > 0) { - // b1t = b1t / conj(diag) - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), - [&](const ordinal_type j) { - b1.Value(j) /= cdiag; - }); - } - - // update - const ordinal_type nnz_a = a.NumNonZeros(); - if (nnz_a > 0) { - // B2 = B2 - trans(conj(a12t)) b1t - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), - [&](const ordinal_type j) { - // grab b1 - const ordinal_type col_at_j = b1.Col(j); - const value_type val_at_j = b1.Value(j); - - for (ordinal_type i=1;i<nnz_a;++i) { - // grab a12t - const ordinal_type row_at_i = a.Col(i); - // const value_type val_at_i = conj(a.Value(i)); - const value_type val_at_i = a.Value(i); - - // grab b2 - row_view_type &b2 = B.RowView(row_at_i); - - // check and update - ordinal_type idx = 0; - idx = b2.Index(col_at_j, idx); - if (idx >= 0) - b2.Value(idx) -= val_at_i*val_at_j; - } - }); - } - member.team_barrier(); - } - } - - return 0; - } - - // Trsm used in the factorization phase: data parallel on a1t - // ========================================================== - template<> - template<typename ScalarType, - typename CrsExecViewTypeA, - typename CrsExecViewTypeB> - KOKKOS_INLINE_FUNCTION - int - Trsm<Side::Left,Uplo::Upper,Trans::ConjTranspose, - AlgoTrsm::ForFactorBlocked,Variant::Two> - ::invoke(typename CrsExecViewTypeA::policy_type &policy, - const typename CrsExecViewTypeA::policy_type::member_type &member, - const int diagA, - const ScalarType alpha, - typename CrsExecViewTypeA::matrix_type &A, - typename CrsExecViewTypeB::matrix_type &B) { - typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; - typedef typename CrsExecViewTypeA::value_type value_type; - typedef typename CrsExecViewTypeA::row_view_type row_view_type; - - // scale the matrix B with alpha - scaleCrsMatrix<ScalarType,CrsExecViewTypeB>(member, alpha, B); - - // Solve a system: AX = B -> B := inv(A) B - const ordinal_type mA = A.NumRows(); - const ordinal_type nB = B.NumCols(); - - if (nB > 0) { - for (ordinal_type k=0;k<mA;++k) { - row_view_type &a = A.RowView(k); - // const value_type cdiag = conj(a.Value(0)); - const value_type cdiag = a.Value(0); - - // invert - row_view_type &b1 = B.RowView(k); - const ordinal_type nnz_b1 = b1.NumNonZeros(); - - if (diagA != Diag::Unit && nnz_b1 > 0) { - // b1t = b1t / conj(diag) - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_b1), - [&](const ordinal_type j) { - b1.Value(j) /= cdiag; - }); - member.team_barrier(); - } - - // update - const ordinal_type nnz_a = a.NumNonZeros(); - if (nnz_a > 0) { - // B2 = B2 - trans(conj(a12t)) b1t - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_a), - [&](const ordinal_type i) { - // grab a12t - const ordinal_type row_at_i = a.Col(i); - // const value_type val_at_i = conj(a.Value(i)); - const value_type val_at_i = a.Value(i); - - // grab b2 - row_view_type &b2 = B.RowView(row_at_i); - - ordinal_type idx = 0; - for (ordinal_type j=0;j<nnz_b1 && (idx > -2);++j) { - // grab b1 - const ordinal_type col_at_j = b1.Col(j); - const value_type val_at_j = b1.Value(j); - - // check and update - idx = b2.Index(col_at_j, idx); - if (idx >= 0) - b2.Value(idx) -= val_at_i*val_at_j; - } - }); - member.team_barrier(); - } - } - } - - return 0; - } - -} - -#endif diff --git a/lib/kokkos/example/ichol/src/util.cpp b/lib/kokkos/example/ichol/src/util.cpp deleted file mode 100644 index ef220c48c1b7d58af2289dde4f226a7a102d63ee..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/util.cpp +++ /dev/null @@ -1,4 +0,0 @@ - - -static int dummy = 1; - diff --git a/lib/kokkos/example/ichol/src/util.hpp b/lib/kokkos/example/ichol/src/util.hpp deleted file mode 100644 index 020475bc52daad5c864d7caa8ed34d03157a0046..0000000000000000000000000000000000000000 --- a/lib/kokkos/example/ichol/src/util.hpp +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once -#ifndef __UTIL_HPP__ -#define __UTIL_HPP__ - -#include <stdio.h> -#include <string.h> - -#include <string> -#include <iostream> -#include <iomanip> -#include <fstream> -#include <vector> -#include <set> -#include <map> -#include <algorithm> -#include <memory> - -#include <cmath> -#include <complex> - -#include <limits> - -/// \file util.hpp -/// \brief Utility functions and constant integer class like an enum class. -/// \author Kyungjoo Kim (kyukim@sandia.gov) -/// -/// This provides utility functions for implementing mini-app for incomplete -/// sparse matrix factorization with task-data parallelism e.g., parameter -/// classes, error handling, ostream << overloading. -/// -/// Note: The reference of the "static const int" members in the enum-like -/// classes should not be used as function arguments but their values only. - - -using namespace std; - -namespace Tacho { - -#undef CHKERR -#define CHKERR(ierr) \ - if (ierr != 0) { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << " : " << ierr << endl; } - -#define MSG_NOT_YET_IMPLEMENTED ">> Not yet implemented" -#define MSG_INVALID_INPUT(what) ">> Invaid input argument: " #what -#define MSG_INVALID_TEMPLATE_ARGS ">> Invaid template arguments" -#define ERROR(msg) \ - { cout << endl << ">> Error in " << __FILE__ << ", " << __LINE__ << endl << msg << endl; } - - // control id -#undef Ctrl -#define Ctrl(name,algo,variant) name<algo,variant> - - // control leaf -#undef CtrlComponent -#define CtrlComponent(name,algo,variant,component,id) \ - Ctrl(name,algo,variant)::component[id] - - // control recursion -#undef CtrlDetail -#define CtrlDetail(name,algo,variant,component) \ - CtrlComponent(name,algo,variant,component,0),CtrlComponent(name,algo,variant,component,1),name - - /// \class GraphHelper - class GraphHelper { - public: - static const int DefaultRandomSeed = -1; - }; - - - /// \class Partition - /// \brief Matrix partition parameters. - class Partition { - public: - static const int Top = 101; - static const int Bottom = 102; - - static const int Left = 201; - static const int Right = 202; - - static const int TopLeft = 401; - static const int TopRight = 402; - static const int BottomLeft = 403; - static const int BottomRight = 404; - }; - - /// \class Uplo - /// \brief Matrix upper/lower parameters. - class Uplo { - public: - static const int Upper = 501; - static const int Lower = 502; - }; - - /// \class Side - /// \brief Matrix left/right parameters. - class Side { - public: - static const int Left = 601; - static const int Right = 602; - }; - - /// \class Diag - /// \brief Matrix unit/non-unit diag parameters. - class Diag { - public: - static const int Unit = 701; - static const int NonUnit = 702; - }; - - /// \class Trans - /// \brief Matrix upper/lower parameters. - class Trans { - public: - static const int Transpose = 801; - static const int ConjTranspose = 802; - static const int NoTranspose = 803; - }; - - /// \class Loop - /// \brief outer/innner parameters - class Loop { - public: - static const int Outer = 901; - static const int Inner = 902; - static const int Fused = 903; - }; - - class Variant { - public: - static const int One = 1; - static const int Two = 2; - static const int Three = 3; - static const int Four = 4; - }; - - /// \class AlgoChol - /// \brief Algorithmic variants in sparse factorization and sparse BLAS operations. - class AlgoChol { - public: - // One side factorization on flat matrices - static const int Dummy = 1000; - static const int Unblocked = 1001; - static const int UnblockedOpt = 1002; - static const int Blocked = 1101; // testing only - - static const int RightLookByBlocks = 1201; // backbone structure is right looking - static const int ByBlocks = RightLookByBlocks; - - static const int NestedDenseBlock = 1211; - static const int NestedDenseByBlocks = 1212; - - static const int RightLookDenseByBlocks = 1221; - static const int DenseByBlocks = RightLookDenseByBlocks; - - static const int ExternalLapack = 1231; - static const int ExternalPardiso = 1232; - }; - - // aliasing name space - typedef AlgoChol AlgoTriSolve; - - class AlgoBlasLeaf { - public: - // One side factorization on flat matrices - static const int ForFactorBlocked = 2001; - - // B and C are dense matrices and used for solve phase - static const int ForTriSolveBlocked = 2011; - - static const int ExternalBlas = 2021; - }; - - class AlgoGemm : public AlgoBlasLeaf { - public: - static const int DenseByBlocks = 2101; - }; - - class AlgoTrsm : public AlgoBlasLeaf { - public: - static const int DenseByBlocks = 2201; - }; - - class AlgoHerk : public AlgoBlasLeaf { - public: - static const int DenseByBlocks = 2301; - }; - - /// \brief Interface for overloaded stream operators. - template<typename T> - inline - ostream& operator<<(ostream &os, const unique_ptr<T> &p) { - return p->showMe(os); - } - - /// \class Disp - /// \brief Interface for the stream operator. - class Disp { - friend ostream& operator<<(ostream &os, const Disp &disp); - public: - Disp() { } - virtual ostream& showMe(ostream &os) const { - return os; - } - }; - - /// \brief Implementation of the overloaded stream operator. - inline - ostream& operator<<(ostream &os, const Disp &disp) { - return disp.showMe(os); - } - - template<typename T> struct NumericTraits {}; - - template<> - struct NumericTraits<float> { - typedef float real_type; - static real_type epsilon() { return numeric_limits<float>::epsilon(); } - }; - template<> - struct NumericTraits<double> { - typedef double real_type; - static real_type epsilon() { return numeric_limits<double>::epsilon(); } - }; - template<> - struct NumericTraits<complex<float> > { - typedef float real_type; - static real_type epsilon() { return numeric_limits<float>::epsilon(); } - }; - template<> - struct NumericTraits<complex<double> > { - typedef double real_type; - static real_type epsilon() { return numeric_limits<double>::epsilon(); } - }; - -} - -#endif diff --git a/lib/kokkos/example/md_skeleton/Makefile b/lib/kokkos/example/md_skeleton/Makefile index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644 --- a/lib/kokkos/example/md_skeleton/Makefile +++ b/lib/kokkos/example/md_skeleton/Makefile @@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +CXXFLAGS = -O3 -I$(SRC_DIR) +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/multi_fem/Makefile b/lib/kokkos/example/multi_fem/Makefile index 72e1768fcb9b446f94400a3e783767923779f6bf..4b114b56255f152206adee8dbc8979ae9015050f 100644 --- a/lib/kokkos/example/multi_fem/Makefile +++ b/lib/kokkos/example/multi_fem/Makefile @@ -12,27 +12,23 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos +CXXFLAGS = -O3 -I$(SRC_DIR) +LDFLAGS ?= ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3 -LINK = $(CXX) -LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) + CXXFLAGS += -I$(SRC_DIR) -I$(CUDA_PATH) -O3 + LDFLAGS += -L$(CUDA_PATH)/lib64 -lcusparse else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +LINK ?= $(CXX) + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/query_device/Makefile b/lib/kokkos/example/query_device/Makefile index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644 --- a/lib/kokkos/example/query_device/Makefile +++ b/lib/kokkos/example/query_device/Makefile @@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +CXXFLAGS = -O3 -I$(SRC_DIR) +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/sort_array/Makefile b/lib/kokkos/example/sort_array/Makefile index bf8fbea3e09a5d71f900de85ff2100cf41bd5738..42b376ec7c5cf73537bf2d49340ce1ca963e3ad1 100644 --- a/lib/kokkos/example/sort_array/Makefile +++ b/lib/kokkos/example/sort_array/Makefile @@ -12,27 +12,20 @@ OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o) default: build echo "Start Build" -# use installed Makefile.kokkos -include $(KOKKOS_PATH)/Makefile.kokkos - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = $(NVCC_WRAPPER) -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "Cuda,OpenMP" -#KOKKOS_ARCH = "SNB,Kepler35" + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR))) else -CXX = g++ -CXXFLAGS = -I$(SRC_DIR) -O3 -LINK = $(CXX) -LINKFLAGS = -EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) -#KOKKOS_DEVICES = "OpenMP" -#KOKKOS_ARCH = "SNB" + CXX = g++ + EXE = $(addsuffix .host, $(shell basename $(SRC_DIR))) endif +CXXFLAGS = -O3 -I$(SRC_DIR) +LINK ?= $(CXX) +LDFLAGS ?= + +include $(KOKKOS_PATH)/Makefile.kokkos + DEPFLAGS = -M LIB = diff --git a/lib/kokkos/example/sort_array/sort_array.hpp b/lib/kokkos/example/sort_array/sort_array.hpp index d21f9989582c7be28e7c5c1c0f325330cc340e78..ae17cb7ac7dc44cbbb4287b3a47c7fd0021de85a 100644 --- a/lib/kokkos/example/sort_array/sort_array.hpp +++ b/lib/kokkos/example/sort_array/sort_array.hpp @@ -105,7 +105,7 @@ void sort_array( const size_t array_length /* length of spans of array to sort * #if defined( KOKKOS_HAVE_CUDA ) typedef typename - Kokkos::Impl::if_c< Kokkos::Impl::is_same< Device , Kokkos::Cuda >::value + Kokkos::Impl::if_c< std::is_same< Device , Kokkos::Cuda >::value , Kokkos::View<int*,Kokkos::Cuda::array_layout,Kokkos::CudaHostPinnedSpace> , typename device_array_type::HostMirror >::type host_array_type ; diff --git a/lib/kokkos/example/tutorial/01_hello_world/Makefile b/lib/kokkos/example/tutorial/01_hello_world/Makefile index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..62ab22f17eb561b4ffcdd38a91115627b8460821 100644 --- a/lib/kokkos/example/tutorial/01_hello_world/Makefile +++ b/lib/kokkos/example/tutorial/01_hello_world/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/01_hello_world/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_hello_world.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_hello_world.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile index 95ee2c47feacf363f99052173a28596144a75734..52d5fb07c481bf7357a6acbfc7ff547f3621f180 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile @@ -1,37 +1,42 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/01_hello_world_lambda/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_hello_world_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" -KOKKOS_CUDA_OPTIONS = "enable_lambda" +KOKKOS_CUDA_OPTIONS += "enable_lambda" else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_hello_world_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -41,4 +46,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index b6c9cc5e4380d4ea8b825c9305f2e7cea6316a10..4b8b9db621106417ba2e73c1e00a7b0a4088552f 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -98,11 +98,14 @@ int main (int argc, char* argv[]) { // // You may notice that the printed numbers do not print out in // order. Parallel for loops may execute in any order. + // We also need to protect the usage of a lambda against compiling + // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). +#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) { // printf works in a CUDA parallel kernel; std::ostream does not. printf ("Hello from i = %i\n", i); }); - +#endif // You must call finalize() after you are done using Kokkos. Kokkos::finalize (); } diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..d102af5151c3eb65bd470665371a1a50dec339f8 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile +++ b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/02_simple_reduce/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 02_simple_reduce.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 02_simple_reduce.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile index 95ee2c47feacf363f99052173a28596144a75734..4545668b77ba2e36a3b6412dce2c4836e1db29cc 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile @@ -1,37 +1,42 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/02_simple_reduce_lambda/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 02_simple_reduce_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" -KOKKOS_CUDA_OPTIONS = "enable_lambda" +KOKKOS_CUDA_OPTIONS += "enable_lambda" else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 02_simple_reduce_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -41,4 +46,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp index a403633a8a898375f2f5c0d4015fc3930570ef0d..f44ddce309de519d7109fb7a4212f7533c73d57c 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -67,9 +67,13 @@ int main (int argc, char* argv[]) { int sum = 0; // The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=]. // It also handles any other syntax needed for CUDA. + // We also need to protect the usage of a lambda against compiling + // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). + #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) { lsum += i*i; }, sum); + #endif printf ("Sum of squares of integers from 0 to %i, " "computed in parallel, is %i\n", n - 1, sum); @@ -81,6 +85,10 @@ int main (int argc, char* argv[]) { printf ("Sum of squares of integers from 0 to %i, " "computed sequentially, is %i\n", n - 1, seqSum); Kokkos::finalize (); +#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) return (sum == seqSum) ? 0 : -1; +#else + return 0; +#endif } diff --git a/lib/kokkos/example/tutorial/03_simple_view/Makefile b/lib/kokkos/example/tutorial/03_simple_view/Makefile index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..e716b765e7f1778d839f2dcd603d258d2287c8fe 100644 --- a/lib/kokkos/example/tutorial/03_simple_view/Makefile +++ b/lib/kokkos/example/tutorial/03_simple_view/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/03_simple_view/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 03_simple_view.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 03_simple_view.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile index 95ee2c47feacf363f99052173a28596144a75734..b93c14910e40ce57936d71cedc869e3dc79182aa 100644 --- a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile @@ -1,37 +1,42 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/03_simple_view_lambda/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 03_simple_view_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" -KOKKOS_CUDA_OPTIONS = "enable_lambda" +KOKKOS_CUDA_OPTIONS += "enable_lambda" else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 03_simple_view_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -41,4 +46,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp index 974af747763bfba23a2f6d3dfeefe68fb9ec4e25..e9e7c2370b3d4f99c5d0998cb5520ce3cca2221b 100644 --- a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp @@ -97,6 +97,9 @@ int main (int argc, char* argv[]) { // pointers, not like std::vector. Passing them by value does a // shallow copy. A deep copy never happens unless you explicitly // ask for one. + // We also need to protect the usage of a lambda against compiling + // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). + #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) { // Acesss the View just like a Fortran array. The layout depends // on the View's memory space, so don't rely on the View's @@ -111,6 +114,7 @@ int main (int argc, char* argv[]) { lsum += a(i,0)*a(i,1)/(a(i,2)+0.1); }, sum); printf ("Result: %f\n", sum); + #endif Kokkos::finalize (); } diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..8dd7598f03664eb610e6bd4376697dc801b80609 100644 --- a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile +++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/04_simple_memoryspaces/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 04_simple_memoryspaces.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 04_simple_memoryspaces.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile index 78a9fed0cce641b48c85f4d67a1d0ab6c5a63388..d297d45576b73a6f622b4d7f9ed84b9dddf4c481 100644 --- a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile +++ b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/05_simple_atomics/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 05_simple_atomics.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 05_simple_atomics.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..956a4d1798f2318a14ec2a3a758a6e0bca5a047d 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/01_data_layouts/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_data_layouts.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_data_layouts.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..41697b0731e2934c94133ec0876fa8f963a299fd 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/02_memory_traits/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 02_memory_traits.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 02_memory_traits.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..8d0697aa2115c79c5749c23195dd917543ea8928 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/03_subviews/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 03_subviews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 03_subviews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..0a3acd984f2ac88e25e034c747790cb01853018f 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/04_dualviews/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 04_dualviews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 04_dualviews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp index 4905e4bf88485c70527d9080844940a61c60365c..26b55eae7886f146699dd527a69a97a974d2dc6d 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp @@ -76,7 +76,7 @@ struct localsum { // overrides Kokkos' default execution space. typedef ExecutionSpace execution_space; - typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value , + typedef typename Kokkos::Impl::if_c<std::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value , idx_type::memory_space, idx_type::host_mirror_space>::type memory_space; // Get the view types on the particular device for which the functor diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..615ee2887a800829d055bf03e126e411ae438669 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 05_NVIDIA_UVM.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 05_NVIDIA_UVM.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp index cf5326b687199ff8c5c14580b18a9e406279cd11..72fd444abfe72f77de348291b0b4480a370e2dc1 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp @@ -47,9 +47,13 @@ #include <cstdio> #include <cstdlib> -typedef Kokkos::View<double*> view_type; -typedef Kokkos::View<int**> idx_type; - +#ifdef KOKKOS_HAVE_CUDA +typedef Kokkos::View<double*, Kokkos::CudaUVMSpace> view_type; +typedef Kokkos::View<int**, Kokkos::CudaUVMSpace> idx_type; +#else +typedef Kokkos::View<double*,Kokkos::HostSpace> view_type; +typedef Kokkos::View<int**,Kokkos::HostSpace> idx_type; +#endif template<class Device> struct localsum { @@ -59,7 +63,7 @@ struct localsum { // Get the view types on the particular device the functor is instantiated for idx_type::const_type idx; view_type dest; - Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::execution_space, Kokkos::MemoryRandomAccess > src; + Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::device_type, Kokkos::MemoryRandomAccess > src; localsum(idx_type idx_, view_type dest_, view_type src_):idx(idx_),dest(dest_),src(src_) { @@ -68,7 +72,7 @@ struct localsum { KOKKOS_INLINE_FUNCTION void operator() (int i) const { double tmp = 0.0; - for(int j = 0; j < idx.dimension_1(); j++) { + for(int j = 0; j < int(idx.dimension_1()); j++) { const double val = src(idx(i,j)); tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val); } @@ -88,9 +92,11 @@ int main(int narg, char* arg[]) { srand(134231); + Kokkos::fence(); + // When using UVM Cuda views can be accessed on the Host directly for(int i=0; i<size; i++) { - for(int j=0; j<idx.dimension_1(); j++) + for(int j=0; j<int(idx.dimension_1()); j++) idx(i,j) = (size + i + (rand()%500 - 250))%size; } @@ -126,8 +132,8 @@ int main(int narg, char* arg[]) { - printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev); - printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host); + printf("Device Time with Sync: %e without Sync: %e \n",sec1_dev,sec2_dev); + printf("Host Time with Sync: %e without Sync: %e \n",sec1_host,sec2_host); Kokkos::finalize(); } diff --git a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..dfb7d6df641f13de25e8f84f7038d9a86a905094 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 06_AtomicViews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 06_AtomicViews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile index 60a514f4d50ccf3e36fa2a8233de90c46f3bbe5d..432a90126d6fbb0bacafcef1679125eea93088e6 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 --default-stream per-thread LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 07_Overlapping_DeepCopy.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 07_Overlapping_DeepCopy.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/Makefile index 19053b61b037f6a21f1be0874b1c23cbbb02a234..bc4012f68cfa22fcf0c9ac074391f26bd7a149d8 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/Makefile @@ -1,84 +1,121 @@ -default: +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../.. +endif + +ifndef KOKKOS_SETTINGS + KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}" + ifdef KOKKOS_ARCH + KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}" + endif + ifdef KOKKOS_DEVICES + KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}" + endif + ifdef KOKKOS_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" + endif + ifdef KOKKOS_CUDA_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}" + endif +endif + +build: + mkdir -p 01_data_layouts cd ./01_data_layouts; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} + mkdir -p 02_memory_traits cd ./02_memory_traits; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} + mkdir -p 03_subviews cd ./03_subviews; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} + mkdir -p 04_dualviews cd ./04_dualviews; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} + mkdir -p 05_NVIDIA_UVM cd ./05_NVIDIA_UVM; \ - make -j 4 - cd ./06_AtomicViews; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + #mkdir -p 06_AtomicViews + #cd ./06_AtomicViews; \ + #make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #mkdir -p 07_Overlapping_DeepCopy + #cd ./07_Overlapping_DeepCopy; \ + #make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} -openmp: +build-insource: cd ./01_data_layouts; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make -j 4 KOKKOS_DEVICES=OpenMP - cd ./06_AtomicViews; \ - make -j 4 KOKKOS_DEVICES=OpenMP - -pthreads: + make build -j 4 ${KOKKOS_SETTINGS} + #cd ./06_AtomicViews; \ + #make build -j 4 ${KOKKOS_SETTINGS} + #cd ./07_Overlapping_DeepCopy; \ + #make build -j 4 ${KOKKOS_SETTINGS} +test: cd ./01_data_layouts; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make -j 4 KOKKOS_DEVICES=Pthreads - cd ./06_AtomicViews; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + #cd ./06_AtomicViews; \ + #make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #cd ./07_Overlapping_DeepCopy; \ + #make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} -serial: +test-insource: cd ./01_data_layouts; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make -j 4 KOKKOS_DEVICES=Serial - cd ./06_AtomicViews; \ - make -j 4 KOKKOS_DEVICES=Serial - -cuda: + make test -j 4 ${KOKKOS_SETTINGS} + #cd ./06_AtomicViews; \ + #make test -j 4 ${KOKKOS_SETTINGS} + #cd ./07_Overlapping_DeepCopy; \ + #make test -j 4 ${KOKKOS_SETTINGS} +clean: cd ./01_data_layouts; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial - cd ./06_AtomicViews; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + #cd ./06_AtomicViews; \ + #make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #cd ./07_Overlapping_DeepCopy; \ + #make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} -clean: +clean-insource: cd ./01_data_layouts; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make clean - cd ./06_AtomicViews; \ - make clean - + make clean ${KOKKOS_SETTINGS} + #cd ./06_AtomicViews; \ + #make clean ${KOKKOS_SETTINGS} + #cd ./07_Overlapping_DeepCopy; \ + #make clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..60f6f94cdf30b2753903e9242065f3e610ff9e17 100644 --- a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile +++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Algorithms/01_random_numbers/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_random_numbers.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_random_numbers.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp index 3e6175a75652d54af1f0ad3c3c818485ccc59b07..a5cf40cedc172f7cc6e94e01e3c1d7e4202a9bd8 100644 --- a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp +++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp @@ -71,11 +71,13 @@ typedef Kokkos::HostSpace::execution_space DefaultHostType; template<class GeneratorPool> struct generate_random { - // The GeneratorPool - GeneratorPool rand_pool; // Output View for the random numbers Kokkos::View<uint64_t*> vals; + + // The GeneratorPool + GeneratorPool rand_pool; + int samples; // Initialize all members @@ -139,8 +141,8 @@ int main(int argc, char* args[]) { Kokkos::fence(); double time_1024 = timer.seconds(); - printf("#Time XorShift64*: %lf %lf\n",time_64,1.0e-9*samples*size/time_64 ); - printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 ); + printf("#Time XorShift64*: %e %e\n",time_64,1.0e-9*samples*size/time_64 ); + printf("#Time XorShift1024*: %e %e\n",time_1024,1.0e-9*samples*size/time_1024 ); Kokkos::deep_copy(vals.h_view,vals.d_view); diff --git a/lib/kokkos/example/tutorial/Algorithms/Makefile b/lib/kokkos/example/tutorial/Algorithms/Makefile index edc2a36024fc24a791a27064e4f36febfec81c1a..ad0b76f9d66f4e3f35f5f1dc329b976c2603353e 100644 --- a/lib/kokkos/example/tutorial/Algorithms/Makefile +++ b/lib/kokkos/example/tutorial/Algorithms/Makefile @@ -1,24 +1,43 @@ -default: - cd ./01_random_numbers; \ - make -j 4 +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../.. +endif -openmp: - cd ./01_random_numbers; \ - make -j 4 KOKKOS_DEVICES=OpenMP +ifndef KOKKOS_SETTINGS + KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}" + ifdef KOKKOS_ARCH + KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}" + endif + ifdef KOKKOS_DEVICES + KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}" + endif + ifdef KOKKOS_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" + endif + ifdef KOKKOS_CUDA_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}" + endif +endif -pthreads: +build: + mkdir -p 01_random_numbers cd ./01_random_numbers; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} -serial: +build-insource: cd ./01_random_numbers; \ - make -j 4 KOKKOS_DEVICES=Serial - -cuda: + make build -j 4 ${KOKKOS_SETTINGS} +test: cd ./01_random_numbers; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} +test-insource: + cd ./01_random_numbers; \ + make test -j 4 ${KOKKOS_SETTINGS} clean: cd ./01_random_numbers; \ - make clean + make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} +clean-insource: + cd ./01_random_numbers; \ + make clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..8c50430c3000b06509efc0e90ea56981ee7ed655 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_thread_teams.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_thread_teams.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile index 965b72b4e9a7aac83f1a748d3f0c4fe611aafabb..b9b017bf1b26d0109260e59c1c0847089989c3d9 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile @@ -1,37 +1,42 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 01_thread_teams_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" -KOKKOS_CUDA_OPTIONS = "enable_lambda" +KOKKOS_CUDA_OPTIONS += "enable_lambda" else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 01_thread_teams_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -41,4 +46,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp index 565dd22e82849fde2fe527f25179ae49346222f9..c0865cfa65336be08f717ebc989f1d994e1faba2 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp @@ -77,6 +77,9 @@ int main (int narg, char* args[]) { // region." That is, every team member is active and will execute // the body of the lambda. int sum = 0; + // We also need to protect the usage of a lambda against compiling + // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). + #if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) { lsum += 1; // TeamPolicy<>::member_type provides functions to query the @@ -85,7 +88,7 @@ int main (int narg, char* args[]) { printf ("Hello World: %i %i // %i %i\n", thread.league_rank (), thread.team_rank (), thread.league_size (), thread.team_size ()); }, sum); - + #endif // The result will be 12*team_policy::team_size_max([=]{}) printf ("Result %i\n",sum); diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..bae9351229856fec2da7a0e8943d06fb6ce68f1f 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 02_nested_parallel_for.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 02_nested_parallel_for.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..a041b69b560feb9dc8cb459b56a542e9d3249830 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 03_vectorization.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 03_vectorization.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile index 12ad36b31e458d155aa6dc653ab8188a7773bd18..6418875c9e2a041f301793c99c048b4a868f8ae5 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile @@ -1,15 +1,17 @@ KOKKOS_PATH = ../../../.. -SRC = $(wildcard *.cpp) +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ../../../../config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.cuda) +EXE = 04_team_scan.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" else @@ -17,20 +19,23 @@ CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} LINKFLAGS = -EXE = $(SRC:.cpp=.host) +EXE = 04_team_scan.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" endif DEPFLAGS = -M -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +test: $(EXE) + ./$(EXE) + $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) @@ -40,4 +45,4 @@ clean: kokkos-clean # Compilation rules %.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp index c12b11d04ddc99957ec4be93c3928b9c3558cb92..ebc8578f0bce9f728670d594a968b5d289eb68da 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp @@ -96,7 +96,10 @@ struct find_2_tuples { } dev.team_barrier(); } - size_t team_shmem_size( int team_size ) const { return sizeof(int)*(chunk_size+2 + team_size * team_size ); } + size_t team_shmem_size( int team_size ) const { + return Kokkos::View<int**,Kokkos::MemoryUnmanaged>::shmem_size(TEAM_SIZE,TEAM_SIZE) + + Kokkos::View<int*,Kokkos::MemoryUnmanaged>::shmem_size(chunk_size+1); + } }; int main(int narg, char* args[]) { diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile index 9d6fff7981806a6d28d7704f9d4a0e6c776c8ed0..44fdf90f8a837da174b96fcb9032b3e47920390f 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile @@ -1,72 +1,95 @@ -default: +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../../.. +endif + +ifndef KOKKOS_SETTINGS + KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}" + ifdef KOKKOS_ARCH + KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}" + endif + ifdef KOKKOS_DEVICES + KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}" + endif + ifdef KOKKOS_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" + endif + ifdef KOKKOS_CUDA_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}" + endif +endif + +build: + mkdir -p 01_thread_teams cd ./01_thread_teams; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} + mkdir -p 01_thread_teams_lambda cd ./01_thread_teams_lambda; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} + mkdir -p 02_nested_parallel_for cd ./02_nested_parallel_for; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} + mkdir -p 03_vectorization cd ./03_vectorization; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} + mkdir -p 04_team_scan cd ./04_team_scan; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} -openmp: +build-insource: cd ./01_thread_teams; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make -j 4 KOKKOS_DEVICES=OpenMP - -pthreads: + make build -j 4 ${KOKKOS_SETTINGS} +test: cd ./01_thread_teams; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} -serial: +test-insource: cd ./01_thread_teams; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make -j 4 KOKKOS_DEVICES=Serial - -cuda: + make test -j 4 ${KOKKOS_SETTINGS} +clean: cd ./01_thread_teams; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} -clean: +clean-insource: cd ./01_thread_teams; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make clean - + make clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Makefile b/lib/kokkos/example/tutorial/Makefile index 300d98ab44340404b31dfb8690ce2a5577b55636..063ace8aabbe6017611ac17a54d12a47cb7e3196 100644 --- a/lib/kokkos/example/tutorial/Makefile +++ b/lib/kokkos/example/tutorial/Makefile @@ -1,144 +1,174 @@ -default: + +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))../.. +endif + +ifndef KOKKOS_SETTINGS + KOKKOS_SETTINGS = "KOKKOS_PATH=${KOKKOS_PATH}" + ifdef KOKKOS_ARCH + KOKKOS_SETTINGS += "KOKKOS_ARCH=${KOKKOS_ARCH}" + endif + ifdef KOKKOS_DEVICES + KOKKOS_SETTINGS += "KOKKOS_DEVICES=${KOKKOS_DEVICES}" + endif + ifdef KOKKOS_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" + endif + ifdef KOKKOS_CUDA_OPTIONS + KOKKOS_SETTINGS += "KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPTIONS}" + endif +endif + +build: + mkdir -p 01_hello_world cd ./01_hello_world; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} + mkdir -p 01_hello_world_lambda cd ./01_hello_world_lambda; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} + mkdir -p 02_simple_reduce cd ./02_simple_reduce; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} + mkdir -p 02_simple_reduce_lambda cd ./02_simple_reduce_lambda; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} + mkdir -p 03_simple_view cd ./03_simple_view; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} + mkdir -p 03_simple_view_lambda cd ./03_simple_view_lambda; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} + mkdir -p 04_simple_memoryspaces cd ./04_simple_memoryspaces; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} + mkdir -p 05_simple_atomics cd ./05_simple_atomics; \ - make -j 4 + make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} + mkdir -p Advanced_Views cd ./Advanced_Views; \ - make -j 4 + make build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + mkdir -p Algorithms cd ./Algorithms; \ - make -j 4 + make build -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + mkdir -p Hierarchical_Parallelism cd ./Hierarchical_Parallelism; \ - make -j 4 + make build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' -openmp: +build-insource: cd ./01_hello_world; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build -j 4 ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make -j 4 KOKKOS_DEVICES=OpenMP + make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make -j 4 KOKKOS_DEVICES=OpenMP - -pthreads: + make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' +test: cd ./01_hello_world; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make -j 4 KOKKOS_DEVICES=Pthreads + make test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' -serial: +test-insource: cd ./01_hello_world; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make -j 4 KOKKOS_DEVICES=Serial + make test -j 4 ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make -j 4 KOKKOS_DEVICES=Serial + make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make -j 4 KOKKOS_DEVICES=Serial + make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make -j 4 KOKKOS_DEVICES=Serial - -cuda: + make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' +clean: cd ./01_hello_world; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make -j 4 KOKKOS_DEVICES=Cuda,Serial + make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' -clean: +clean-insource: cd ./01_hello_world; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make clean + make clean ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make clean + make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make clean + make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make clean - + make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 86f136da96ed10e0a2f23c0cb2752eaaa5287d90..6fa03ebb34aa4d8253bcb81f0388a3bdceff2e26 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -63,6 +63,20 @@ case $key in ;; --compiler*) COMPILER="${key#*=}" + CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` + if [ ${CNUM} -gt 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + if [[ ! -n ${COMPILER} ]]; then + echo "Empty compiler specified by --compiler command." + exit + fi + CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l` + if [ ${CNUM} -eq 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi ;; --with-options*) KOKKOS_OPT="${key#*=}" @@ -80,15 +94,22 @@ case $key in echo "--with-devices: explicitly add a set of backends" echo "" echo "--arch=[OPTIONS]: set target architectures. Options are:" - echo " SNB = Intel Sandy/Ivy Bridge CPUs" - echo " HSW = Intel Haswell CPUs" - echo " KNC = Intel Knights Corner Xeon Phi" - echo " KNL = Intel Knights Landing Xeon Phi" - echo " Kepler30 = NVIDIA Kepler generation CC 3.0" - echo " Kepler35 = NVIDIA Kepler generation CC 3.5" - echo " Kepler37 = NVIDIA Kepler generation CC 3.7" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Power8 = IBM Power 8 CPUs" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Power8 = IBM POWER8 CPUs" echo "" echo "--compiler=/Path/To/Compiler set the compiler" echo "--debug,-dbg: enable Debugging" @@ -127,50 +148,65 @@ echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" exit fi -KOKKOS_OPTIONS="KOKKOS_PATH=${KOKKOS_PATH}" +KOKKOS_SRC_PATH=${KOKKOS_PATH} + +KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" +#KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}" if [ ${#COMPILER} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXX=${COMPILER}" -fi -if [ ${#PREFIX} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} PREFIX=${PREFIX}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" fi if [ ${#KOKKOS_DEVICES} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" fi if [ ${#KOKKOS_ARCH} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_ARCH=${KOKKOS_ARCH}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" fi if [ ${#KOKKOS_DEBUG} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" fi if [ ${#CUDA_PATH} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CUDA_PATH=${CUDA_PATH}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" fi if [ ${#CXXFLAGS} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXXFLAGS=\"${CXXFLAGS}\"" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" fi if [ ${#LDFLAGS} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} LDFLAGS=\"${LDFLAGS}\"" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" fi if [ ${#GTEST_PATH} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" else GTEST_PATH=${KOKKOS_PATH}/tpls/gtest -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" fi if [ ${#HWLOC_PATH} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" fi if [ ${#QTHREAD_PATH} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} QTHREAD_PATH=${QTHREAD_PATH}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREAD_PATH=${QTHREAD_PATH}" fi if [ ${#KOKKOS_OPT} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_OPTIONS=${KOKKOS_OPT}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" fi if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then -KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" +KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" +fi + +KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" + +KOKKOS_TEST_INSTALL_PATH="${PWD}/install" +if [ ${#PREFIX} -gt 0 ]; then +KOKKOS_INSTALL_PATH="${PREFIX}" +else +KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} fi + + +mkdir install +echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/Makefile.kokkos +echo "kokkos-clean:" >> install/Makefile.kokkos +echo "" >> install/Makefile.kokkos mkdir core mkdir core/unit_test mkdir core/perf_test @@ -184,126 +220,153 @@ mkdir example mkdir example/fixture mkdir example/feint mkdir example/fenl +mkdir example/tutorial if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then mkdir example/ichol fi +KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" + # Generate subdirectory makefiles. -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/unit_test/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "all:" >> core/unit_test/Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS}" >> core/unit_test/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "test: all" >> core/unit_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} test" >> core/unit_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "clean:" >> core/unit_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/unit_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > core/perf_test/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "all:" >> core/perf_test/Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS}" >> core/perf_test/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "test: all" >> core/perf_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} test" >> core/perf_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "clean:" >> core/perf_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} clean" >> core/perf_test/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/unit_tests/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "all:" >> containers/unit_tests/Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/unit_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "test: all" >> containers/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "clean:" >> containers/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > containers/performance_tests/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "all:" >> containers/performance_tests/Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS}" >> containers/performance_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "test: all" >> containers/performance_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} test" >> containers/performance_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "clean:" >> containers/performance_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} clean" >> containers/performance_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > algorithms/unit_tests/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "all:" >> algorithms/unit_tests/Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> algorithms/unit_tests/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "test: all" >> algorithms/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> algorithms/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "clean:" >> algorithms/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} clean" >> algorithms/unit_tests/Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile + +KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_TEST_INSTALL_PATH}" -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fixture/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fixture/Makefile echo "" >> example/fixture/Makefile echo "all:" >> example/fixture/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS}" >> example/fixture/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile echo "" >> example/fixture/Makefile echo "test: all" >> example/fixture/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} test" >> example/fixture/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile echo "" >> example/fixture/Makefile echo "clean:" >> example/fixture/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} clean" >> example/fixture/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/feint/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/feint/Makefile echo "" >> example/feint/Makefile echo "all:" >> example/feint/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS}" >> example/feint/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile echo "" >> example/feint/Makefile echo "test: all" >> example/feint/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} test" >> example/feint/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile echo "" >> example/feint/Makefile echo "clean:" >> example/feint/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} clean" >> example/feint/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/fenl/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fenl/Makefile echo "" >> example/fenl/Makefile echo "all:" >> example/fenl/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS}" >> example/fenl/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile echo "" >> example/fenl/Makefile echo "test: all" >> example/fenl/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} test" >> example/fenl/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile echo "" >> example/fenl/Makefile echo "clean:" >> example/fenl/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} clean" >> example/fenl/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "build:" >> example/tutorial/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "test: build" >> example/tutorial/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "clean:" >> example/tutorial/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile + if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > example/ichol/Makefile +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/ichol/Makefile echo "" >> example/ichol/Makefile echo "all:" >> example/ichol/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS}" >> example/ichol/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS}" >> example/ichol/Makefile echo "" >> example/ichol/Makefile echo "test: all" >> example/ichol/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} test" >> example/ichol/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} test" >> example/ichol/Makefile echo "" >> example/ichol/Makefile echo "clean:" >> example/ichol/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_OPTIONS} clean" >> example/ichol/Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} clean" >> example/ichol/Makefile fi +KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" + # Generate top level directory makefile. -echo "Generating Makefiles with options " ${KOKKOS_OPTIONS} -echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > Makefile +echo "Generating Makefiles with options " ${KOKKOS_SETTINGS} +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > Makefile echo "" >> Makefile -echo "lib:" >> Makefile +echo "kokkoslib:" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS}" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} build-lib" >> Makefile echo "" >> Makefile -echo "install: lib" >> Makefile +echo "install: kokkoslib" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} install" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} install" >> Makefile echo "" >> Makefile -echo "build-test:" >> Makefile +echo "kokkoslib-test:" >> Makefile +echo -e "\tcd core; \\" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} build-lib" >> Makefile +echo "" >> Makefile +echo "install-test: kokkoslib-test" >> Makefile +echo -e "\tcd core; \\" >> Makefile +echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} install" >> Makefile +echo "" >> Makefile +echo "build-test: install-test" >> Makefile echo -e "\tmake -C core/unit_test" >> Makefile echo -e "\tmake -C core/perf_test" >> Makefile echo -e "\tmake -C containers/unit_tests" >> Makefile @@ -312,6 +375,7 @@ echo -e "\tmake -C algorithms/unit_tests" >> Makefile echo -e "\tmake -C example/fixture" >> Makefile echo -e "\tmake -C example/feint" >> Makefile echo -e "\tmake -C example/fenl" >> Makefile +echo -e "\tmake -C example/tutorial build" >> Makefile echo "" >> Makefile echo "test: build-test" >> Makefile echo -e "\tmake -C core/unit_test test" >> Makefile @@ -322,6 +386,12 @@ echo -e "\tmake -C algorithms/unit_tests test" >> Makefile echo -e "\tmake -C example/fixture test" >> Makefile echo -e "\tmake -C example/feint test" >> Makefile echo -e "\tmake -C example/fenl test" >> Makefile +echo -e "\tmake -C example/tutorial test" >> Makefile +echo "" >> Makefile +echo "unit-tests-only:" >> Makefile +echo -e "\tmake -C core/unit_test test" >> Makefile +echo -e "\tmake -C containers/unit_tests test" >> Makefile +echo -e "\tmake -C algorithms/unit_tests test" >> Makefile echo "" >> Makefile echo "clean:" >> Makefile echo -e "\tmake -C core/unit_test clean" >> Makefile @@ -332,5 +402,6 @@ echo -e "\tmake -C algorithms/unit_tests clean" >> Makefile echo -e "\tmake -C example/fixture clean" >> Makefile echo -e "\tmake -C example/feint clean" >> Makefile echo -e "\tmake -C example/fenl clean" >> Makefile +echo -e "\tmake -C example/tutorial clean" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} clean" >> Makefile +echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} clean" >> Makefile diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index cc096058ecc1d368b5d645bffac718648388ac92..5b53b8ed05d5fb6fbe8a6b1a1001a70f83bfe726 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -915,6 +915,14 @@ void memset_kokkos (ViewType &view) { ViewType::execution_space::fence(); } +struct params_lj_coul { + KOKKOS_INLINE_FUNCTION + params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + KOKKOS_INLINE_FUNCTION + params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; +}; + #if defined(KOKKOS_HAVE_CXX11) #undef ISFINITE #define ISFINITE(x) std::isfinite(x) diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h index 3c0b7d46aea065fb877723ed04f3e4ef9fb7e05e..048a7dab60cc343d1e30ec2315dfba60855d3a73 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h @@ -44,11 +44,6 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h index 202cda68b31d4fc3dd90f75de2cab0dfcffbfafd..db0b14a84ec368ea0ae4832c39d83a931d18809b 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h @@ -44,11 +44,6 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h index fcdab7ddb626971587f47cafae79214ca6563cdf..0969d11b0ec3f8460a05fb1929a81f33f6b37d1d 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h @@ -43,12 +43,6 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h index 1ea5bc69bf4c9032af67573b80d6a980891f7750..c3492666dece0f786014fe0990a9004773578315 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h @@ -43,11 +43,6 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h index 0b1b2dc90b64cc18164eb1d4e4ccfab50773fb48..c5c46ed2d52c31db795f883622165117a7745f5e 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h @@ -44,12 +44,6 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h index 36f31d176c0365640c0bddd31f25a180c51ac8ba..5891371d140285e1e8f6fe0a5697b15cd263dc80 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h @@ -43,14 +43,6 @@ class PairLJCutCoulCutKokkos : public PairLJCutCoulCut { void init_style(); double init_one(int, int); - struct params_lj_coul{ - KOKKOS_INLINE_FUNCTION - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - KOKKOS_INLINE_FUNCTION - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h index 9e1e30abaddf6f43ad575921228c9d8e61ac9918..d507f76a3a94adaedfcc6faa70217c5f42097d16 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h @@ -43,12 +43,6 @@ class PairLJCutCoulDebyeKokkos : public PairLJCutCoulDebye { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h index b1f578ec0bf109d1b4fc435410d574b3cdd1ebd3..3e378757c0f7c01d9dc15558b6381c9a482878f1 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h @@ -42,12 +42,6 @@ class PairLJCutCoulDSFKokkos : public PairLJCutCoulDSF { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h index 5bdaaf96ca60f625ee93e4ae1531c2e98dc61e65..73283292323e967950394c0ffaef29b630aa4539 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h @@ -44,12 +44,6 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; - }; - protected: void cleanup_copy(); diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp index 499a826676b06137ec5630c0f218d786d3d2b77f..b636f36499a8ffd5c765615634af49218acb448b 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp @@ -286,7 +286,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::allocate() memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); - k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1); + k_params = Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1); params = k_params.d_view; } diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h index 8b10eb71a36f6dee7a3dbca271bee661244db386..bbf5c50a624148060206a4ebff87e3e8a2b466a3 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h @@ -44,9 +44,11 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs { void init_style(); double init_one(int, int); - struct params_lj_coul{ - params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;}; - params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;}; + struct params_lj_coul_gromacs{ + KOKKOS_INLINE_FUNCTION + params_lj_coul_gromacs(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;}; + KOKKOS_INLINE_FUNCTION + params_lj_coul_gromacs(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;ljsw1=0;ljsw2=0;ljsw3=0;ljsw4=0;ljsw5=0;}; F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset,ljsw1,ljsw2,ljsw3,ljsw4,ljsw5; }; @@ -73,11 +75,11 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs { F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; - Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params; - typename Kokkos::DualView<params_lj_coul**, + Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_lj_coul_gromacs**, Kokkos::LayoutRight,DeviceType>::t_dev_const_um params; // hardwired to space for 12 atom types - params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + params_lj_coul_gromacs m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; diff --git a/src/MAKE/MACHINES/Makefile.white b/src/MAKE/MACHINES/Makefile.white index ae31664b0dde69d357aae9ce826421cf14a30fa4..53de76e7362b252aea3404bbad66562ab70f9f6d 100644 --- a/src/MAKE/MACHINES/Makefile.white +++ b/src/MAKE/MACHINES/Makefile.white @@ -23,7 +23,6 @@ ARFLAGS = -rc SHLIBFLAGS = -shared KOKKOS_DEVICES = Cuda, OpenMP KOKKOS_ARCH = Kepler35 -KOKKOS_CUDA_OPTIONS = enable_lambda # --------------------------------------------------------------------- # LAMMPS-specific settings, all OPTIONAL diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich index efdc728bdf01ae6ec6fa5853053bb25fb8def07f..be0c2d1913bc1aef371ee89d040368578dd2c170 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpich @@ -7,13 +7,14 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) -CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper +export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper +CC = mpicxx CCFLAGS = -g -O3 SHFLAGS = -fPIC DEPFLAGS = -M -LINK = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper -LINKFLAGS = -g -O +LINK = mpicxx +LINKFLAGS = -g -O3 LIB = SIZE = size